// generator.hpp | |
// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) | |
// | |
// Distributed under the Boost Software License, Version 1.0. (See accompanying | |
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#ifndef BOOST_LEXER_GENERATOR_HPP | |
#define BOOST_LEXER_GENERATOR_HPP | |
#include "char_traits.hpp" | |
// memcmp() | |
#include <cstring> | |
#include "partition/charset.hpp" | |
#include "partition/equivset.hpp" | |
#include <memory> | |
#include "parser/tree/node.hpp" | |
#include "parser/parser.hpp" | |
#include "containers/ptr_list.hpp" | |
#include "rules.hpp" | |
#include "state_machine.hpp" | |
namespace boost | |
{ | |
namespace lexer | |
{ | |
template<typename CharT, typename Traits = char_traits<CharT> > | |
class basic_generator | |
{ | |
public: | |
typedef typename detail::internals::size_t_vector size_t_vector; | |
typedef basic_rules<CharT> rules; | |
static void build (const rules &rules_, | |
basic_state_machine<CharT> &state_machine_) | |
{ | |
std::size_t index_ = 0; | |
std::size_t size_ = rules_.statemap ().size (); | |
node_ptr_vector node_ptr_vector_; | |
detail::internals &internals_ = const_cast<detail::internals &> | |
(state_machine_.data ()); | |
bool seen_BOL_assertion_ = false; | |
bool seen_EOL_assertion_ = false; | |
state_machine_.clear (); | |
for (; index_ < size_; ++index_) | |
{ | |
internals_._lookup->push_back (static_cast<size_t_vector *>(0)); | |
internals_._lookup->back () = new size_t_vector; | |
internals_._dfa_alphabet.push_back (0); | |
internals_._dfa->push_back (static_cast<size_t_vector *>(0)); | |
internals_._dfa->back () = new size_t_vector; | |
} | |
for (index_ = 0, size_ = internals_._lookup->size (); | |
index_ < size_; ++index_) | |
{ | |
internals_._lookup[index_]->resize (sizeof (CharT) == 1 ? | |
num_chars : num_wchar_ts, dead_state_index); | |
if (!rules_.regexes ()[index_].empty ()) | |
{ | |
// vector mapping token indexes to partitioned token index sets | |
index_set_vector set_mapping_; | |
// syntax tree | |
detail::node *root_ = build_tree (rules_, index_, | |
node_ptr_vector_, internals_, set_mapping_); | |
build_dfa (root_, set_mapping_, | |
internals_._dfa_alphabet[index_], | |
*internals_._dfa[index_]); | |
if (internals_._seen_BOL_assertion) | |
{ | |
seen_BOL_assertion_ = true; | |
} | |
if (internals_._seen_EOL_assertion) | |
{ | |
seen_EOL_assertion_ = true; | |
} | |
internals_._seen_BOL_assertion = false; | |
internals_._seen_EOL_assertion = false; | |
} | |
} | |
internals_._seen_BOL_assertion = seen_BOL_assertion_; | |
internals_._seen_EOL_assertion = seen_EOL_assertion_; | |
} | |
static void minimise (basic_state_machine<CharT> &state_machine_) | |
{ | |
detail::internals &internals_ = const_cast<detail::internals &> | |
(state_machine_.data ()); | |
const std::size_t machines_ = internals_._dfa->size (); | |
for (std::size_t i_ = 0; i_ < machines_; ++i_) | |
{ | |
const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_]; | |
size_t_vector *dfa_ = internals_._dfa[i_]; | |
if (dfa_alphabet_ != 0) | |
{ | |
std::size_t size_ = 0; | |
do | |
{ | |
size_ = dfa_->size (); | |
minimise_dfa (dfa_alphabet_, *dfa_, size_); | |
} while (dfa_->size () != size_); | |
} | |
} | |
} | |
protected: | |
typedef detail::basic_charset<CharT> charset; | |
typedef detail::ptr_list<charset> charset_list; | |
typedef std::auto_ptr<charset> charset_ptr; | |
typedef detail::equivset equivset; | |
typedef detail::ptr_list<equivset> equivset_list; | |
typedef std::auto_ptr<equivset> equivset_ptr; | |
typedef typename charset::index_set index_set; | |
typedef std::vector<index_set> index_set_vector; | |
typedef detail::basic_parser<CharT> parser; | |
typedef typename parser::node_ptr_vector node_ptr_vector; | |
typedef std::set<const detail::node *> node_set; | |
typedef detail::ptr_vector<node_set> node_set_vector; | |
typedef std::vector<const detail::node *> node_vector; | |
typedef detail::ptr_vector<node_vector> node_vector_vector; | |
typedef typename parser::string string; | |
typedef std::pair<string, string> string_pair; | |
typedef typename parser::tokeniser::string_token string_token; | |
typedef std::deque<string_pair> macro_deque; | |
typedef std::pair<string, const detail::node *> macro_pair; | |
typedef typename parser::macro_map::iterator macro_iter; | |
typedef std::pair<macro_iter, bool> macro_iter_pair; | |
typedef typename parser::tokeniser::token_map token_map; | |
static detail::node *build_tree (const rules &rules_, | |
const std::size_t state_, node_ptr_vector &node_ptr_vector_, | |
detail::internals &internals_, index_set_vector &set_mapping_) | |
{ | |
size_t_vector *lookup_ = internals_._lookup[state_]; | |
const typename rules::string_deque_deque ®exes_ = | |
rules_.regexes (); | |
const typename rules::id_vector_deque &ids_ = rules_.ids (); | |
const typename rules::id_vector_deque &unique_ids_ = | |
rules_.unique_ids (); | |
const typename rules::id_vector_deque &states_ = rules_.states (); | |
typename rules::string_deque::const_iterator regex_iter_ = | |
regexes_[state_].begin (); | |
typename rules::string_deque::const_iterator regex_iter_end_ = | |
regexes_[state_].end (); | |
typename rules::id_vector::const_iterator ids_iter_ = | |
ids_[state_].begin (); | |
typename rules::id_vector::const_iterator unique_ids_iter_ = | |
unique_ids_[state_].begin (); | |
typename rules::id_vector::const_iterator states_iter_ = | |
states_[state_].begin (); | |
const typename rules::string ®ex_ = *regex_iter_; | |
// map of regex charset tokens (strings) to index | |
token_map token_map_; | |
const typename rules::string_pair_deque ¯odeque_ = | |
rules_.macrodeque (); | |
typename parser::macro_map macromap_; | |
typename detail::node::node_vector tree_vector_; | |
build_macros (token_map_, macrodeque_, macromap_, | |
rules_.flags (), rules_.locale (), node_ptr_vector_, | |
internals_._seen_BOL_assertion, internals_._seen_EOL_assertion); | |
detail::node *root_ = parser::parse (regex_.c_str (), | |
regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_, | |
*states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_, | |
macromap_, token_map_, internals_._seen_BOL_assertion, | |
internals_._seen_EOL_assertion); | |
++regex_iter_; | |
++ids_iter_; | |
++unique_ids_iter_; | |
++states_iter_; | |
tree_vector_.push_back (root_); | |
// build syntax trees | |
while (regex_iter_ != regex_iter_end_) | |
{ | |
// re-declare var, otherwise we perform an assignment..! | |
const typename rules::string ®ex_ = *regex_iter_; | |
root_ = parser::parse (regex_.c_str (), | |
regex_.c_str () + regex_.size (), *ids_iter_, | |
*unique_ids_iter_, *states_iter_, rules_.flags (), | |
rules_.locale (), node_ptr_vector_, macromap_, token_map_, | |
internals_._seen_BOL_assertion, | |
internals_._seen_EOL_assertion); | |
tree_vector_.push_back (root_); | |
++regex_iter_; | |
++ids_iter_; | |
++unique_ids_iter_; | |
++states_iter_; | |
} | |
if (internals_._seen_BOL_assertion) | |
{ | |
// Fixup BOLs | |
typename detail::node::node_vector::iterator iter_ = | |
tree_vector_.begin (); | |
typename detail::node::node_vector::iterator end_ = | |
tree_vector_.end (); | |
for (; iter_ != end_; ++iter_) | |
{ | |
fixup_bol (*iter_, node_ptr_vector_); | |
} | |
} | |
// join trees | |
{ | |
typename detail::node::node_vector::iterator iter_ = | |
tree_vector_.begin (); | |
typename detail::node::node_vector::iterator end_ = | |
tree_vector_.end (); | |
if (iter_ != end_) | |
{ | |
root_ = *iter_; | |
++iter_; | |
} | |
for (; iter_ != end_; ++iter_) | |
{ | |
node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0)); | |
node_ptr_vector_->back () = new detail::selection_node | |
(root_, *iter_); | |
root_ = node_ptr_vector_->back (); | |
} | |
} | |
// partitioned token list | |
charset_list token_list_; | |
set_mapping_.resize (token_map_.size ()); | |
partition_tokens (token_map_, token_list_); | |
typename charset_list::list::const_iterator iter_ = | |
token_list_->begin (); | |
typename charset_list::list::const_iterator end_ = | |
token_list_->end (); | |
std::size_t index_ = 0; | |
for (; iter_ != end_; ++iter_, ++index_) | |
{ | |
const charset *cs_ = *iter_; | |
typename charset::index_set::const_iterator set_iter_ = | |
cs_->_index_set.begin (); | |
typename charset::index_set::const_iterator set_end_ = | |
cs_->_index_set.end (); | |
fill_lookup (cs_->_token, lookup_, index_); | |
for (; set_iter_ != set_end_; ++set_iter_) | |
{ | |
set_mapping_[*set_iter_].insert (index_); | |
} | |
} | |
internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset; | |
return root_; | |
} | |
static void build_macros (token_map &token_map_, | |
const macro_deque ¯odeque_, | |
typename parser::macro_map ¯omap_, const regex_flags flags_, | |
const std::locale &locale_, node_ptr_vector &node_ptr_vector_, | |
bool &seen_BOL_assertion_, bool &seen_EOL_assertion_) | |
{ | |
for (typename macro_deque::const_iterator iter_ = | |
macrodeque_.begin (), end_ = macrodeque_.end (); | |
iter_ != end_; ++iter_) | |
{ | |
const typename rules::string &name_ = iter_->first; | |
const typename rules::string ®ex_ = iter_->second; | |
detail::node *node_ = parser::parse (regex_.c_str (), | |
regex_.c_str () + regex_.size (), 0, 0, 0, flags_, | |
locale_, node_ptr_vector_, macromap_, token_map_, | |
seen_BOL_assertion_, seen_EOL_assertion_); | |
macro_iter_pair map_iter_ = macromap_. | |
insert (macro_pair (name_, static_cast<const detail::node *> | |
(0))); | |
map_iter_.first->second = node_; | |
} | |
} | |
static void build_dfa (detail::node *root_, | |
const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_, | |
size_t_vector &dfa_) | |
{ | |
typename detail::node::node_vector *followpos_ = | |
&root_->firstpos (); | |
node_set_vector seen_sets_; | |
node_vector_vector seen_vectors_; | |
size_t_vector hash_vector_; | |
// 'jam' state | |
dfa_.resize (dfa_alphabet_, 0); | |
closure (followpos_, seen_sets_, seen_vectors_, | |
hash_vector_, dfa_alphabet_, dfa_); | |
std::size_t *ptr_ = 0; | |
for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_) | |
{ | |
equivset_list equiv_list_; | |
build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_); | |
for (typename equivset_list::list::const_iterator iter_ = | |
equiv_list_->begin (), end_ = equiv_list_->end (); | |
iter_ != end_; ++iter_) | |
{ | |
equivset *equivset_ = *iter_; | |
const std::size_t transition_ = closure | |
(&equivset_->_followpos, seen_sets_, seen_vectors_, | |
hash_vector_, dfa_alphabet_, dfa_); | |
if (transition_ != npos) | |
{ | |
ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_); | |
// Prune abstemious transitions from end states. | |
if (*ptr_ && !equivset_->_greedy) continue; | |
for (typename detail::equivset::index_vector::const_iterator | |
equiv_iter_ = equivset_->_index_vector.begin (), | |
equiv_end_ = equivset_->_index_vector.end (); | |
equiv_iter_ != equiv_end_; ++equiv_iter_) | |
{ | |
const std::size_t index_ = *equiv_iter_; | |
if (index_ == bol_token) | |
{ | |
if (ptr_[eol_index] == 0) | |
{ | |
ptr_[bol_index] = transition_; | |
} | |
} | |
else if (index_ == eol_token) | |
{ | |
if (ptr_[bol_index] == 0) | |
{ | |
ptr_[eol_index] = transition_; | |
} | |
} | |
else | |
{ | |
ptr_[index_ + dfa_offset] = transition_; | |
} | |
} | |
} | |
} | |
} | |
} | |
static std::size_t closure (typename detail::node::node_vector *followpos_, | |
node_set_vector &seen_sets_, node_vector_vector &seen_vectors_, | |
size_t_vector &hash_vector_, const std::size_t size_, | |
size_t_vector &dfa_) | |
{ | |
bool end_state_ = false; | |
std::size_t id_ = 0; | |
std::size_t unique_id_ = npos; | |
std::size_t state_ = 0; | |
std::size_t hash_ = 0; | |
if (followpos_->empty ()) return npos; | |
std::size_t index_ = 0; | |
std::auto_ptr<node_set> set_ptr_ (new node_set); | |
std::auto_ptr<node_vector> vector_ptr_ (new node_vector); | |
for (typename detail::node::node_vector::const_iterator iter_ = | |
followpos_->begin (), end_ = followpos_->end (); | |
iter_ != end_; ++iter_) | |
{ | |
closure_ex (*iter_, end_state_, id_, unique_id_, state_, | |
set_ptr_.get (), vector_ptr_.get (), hash_); | |
} | |
bool found_ = false; | |
typename size_t_vector::const_iterator hash_iter_ = | |
hash_vector_.begin (); | |
typename size_t_vector::const_iterator hash_end_ = | |
hash_vector_.end (); | |
typename node_set_vector::vector::const_iterator set_iter_ = | |
seen_sets_->begin (); | |
for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_) | |
{ | |
found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_; | |
++index_; | |
if (found_) break; | |
} | |
if (!found_) | |
{ | |
seen_sets_->push_back (static_cast<node_set *>(0)); | |
seen_sets_->back () = set_ptr_.release (); | |
seen_vectors_->push_back (static_cast<node_vector *>(0)); | |
seen_vectors_->back () = vector_ptr_.release (); | |
hash_vector_.push_back (hash_); | |
// State 0 is the jam state... | |
index_ = seen_sets_->size (); | |
const std::size_t old_size_ = dfa_.size (); | |
dfa_.resize (old_size_ + size_, 0); | |
if (end_state_) | |
{ | |
dfa_[old_size_] |= end_state; | |
dfa_[old_size_ + id_index] = id_; | |
dfa_[old_size_ + unique_id_index] = unique_id_; | |
dfa_[old_size_ + state_index] = state_; | |
} | |
} | |
return index_; | |
} | |
static void closure_ex (detail::node *node_, bool &end_state_, | |
std::size_t &id_, std::size_t &unique_id_, std::size_t &state_, | |
node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_) | |
{ | |
const bool temp_end_state_ = node_->end_state (); | |
if (temp_end_state_) | |
{ | |
if (!end_state_) | |
{ | |
end_state_ = true; | |
id_ = node_->id (); | |
unique_id_ = node_->unique_id (); | |
state_ = node_->lexer_state (); | |
} | |
} | |
if (set_ptr_->insert (node_).second) | |
{ | |
vector_ptr_->push_back (node_); | |
hash_ += reinterpret_cast<std::size_t> (node_); | |
} | |
} | |
static void partition_tokens (const token_map &map_, | |
charset_list &lhs_) | |
{ | |
charset_list rhs_; | |
fill_rhs_list (map_, rhs_); | |
if (!rhs_->empty ()) | |
{ | |
typename charset_list::list::iterator iter_; | |
typename charset_list::list::iterator end_; | |
charset_ptr overlap_ (new charset); | |
lhs_->push_back (static_cast<charset *>(0)); | |
lhs_->back () = rhs_->front (); | |
rhs_->pop_front (); | |
while (!rhs_->empty ()) | |
{ | |
charset_ptr r_ (rhs_->front ()); | |
rhs_->pop_front (); | |
iter_ = lhs_->begin (); | |
end_ = lhs_->end (); | |
while (!r_->empty () && iter_ != end_) | |
{ | |
typename charset_list::list::iterator l_iter_ = iter_; | |
(*l_iter_)->intersect (*r_.get (), *overlap_.get ()); | |
if (overlap_->empty ()) | |
{ | |
++iter_; | |
} | |
else if ((*l_iter_)->empty ()) | |
{ | |
delete *l_iter_; | |
*l_iter_ = overlap_.release (); | |
// VC++ 6 Hack: | |
charset_ptr temp_overlap_ (new charset); | |
overlap_ = temp_overlap_; | |
++iter_; | |
} | |
else if (r_->empty ()) | |
{ | |
delete r_.release (); | |
r_ = overlap_; | |
// VC++ 6 Hack: | |
charset_ptr temp_overlap_ (new charset); | |
overlap_ = temp_overlap_; | |
break; | |
} | |
else | |
{ | |
iter_ = lhs_->insert (++iter_, | |
static_cast<charset *>(0)); | |
*iter_ = overlap_.release (); | |
// VC++ 6 Hack: | |
charset_ptr temp_overlap_ (new charset); | |
overlap_ = temp_overlap_; | |
++iter_; | |
end_ = lhs_->end (); | |
} | |
} | |
if (!r_->empty ()) | |
{ | |
lhs_->push_back (static_cast<charset *>(0)); | |
lhs_->back () = r_.release (); | |
} | |
} | |
} | |
} | |
static void fill_rhs_list (const token_map &map_, | |
charset_list &list_) | |
{ | |
typename parser::tokeniser::token_map::const_iterator iter_ = | |
map_.begin (); | |
typename parser::tokeniser::token_map::const_iterator end_ = | |
map_.end (); | |
for (; iter_ != end_; ++iter_) | |
{ | |
list_->push_back (static_cast<charset *>(0)); | |
list_->back () = new charset (iter_->first, iter_->second); | |
} | |
} | |
static void fill_lookup (const string_token &token_, | |
size_t_vector *lookup_, const std::size_t index_) | |
{ | |
const CharT *curr_ = token_._charset.c_str (); | |
const CharT *chars_end_ = curr_ + token_._charset.size (); | |
std::size_t *ptr_ = &lookup_->front (); | |
const std::size_t max_ = sizeof (CharT) == 1 ? | |
num_chars : num_wchar_ts; | |
if (token_._negated) | |
{ | |
CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0; | |
std::size_t i_ = 0; | |
while (curr_ < chars_end_) | |
{ | |
while (*curr_ > curr_char_) | |
{ | |
ptr_[static_cast<typename Traits::index_type> | |
(curr_char_)] = index_ + dfa_offset; | |
++curr_char_; | |
++i_; | |
} | |
++curr_char_; | |
++curr_; | |
++i_; | |
} | |
for (; i_ < max_; ++i_) | |
{ | |
ptr_[static_cast<typename Traits::index_type>(curr_char_)] = | |
index_ + dfa_offset; | |
++curr_char_; | |
} | |
} | |
else | |
{ | |
while (curr_ < chars_end_) | |
{ | |
ptr_[static_cast<typename Traits::index_type>(*curr_)] = | |
index_ + dfa_offset; | |
++curr_; | |
} | |
} | |
} | |
static void build_equiv_list (const node_vector *vector_, | |
const index_set_vector &set_mapping_, equivset_list &lhs_) | |
{ | |
equivset_list rhs_; | |
fill_rhs_list (vector_, set_mapping_, rhs_); | |
if (!rhs_->empty ()) | |
{ | |
typename equivset_list::list::iterator iter_; | |
typename equivset_list::list::iterator end_; | |
equivset_ptr overlap_ (new equivset); | |
lhs_->push_back (static_cast<equivset *>(0)); | |
lhs_->back () = rhs_->front (); | |
rhs_->pop_front (); | |
while (!rhs_->empty ()) | |
{ | |
equivset_ptr r_ (rhs_->front ()); | |
rhs_->pop_front (); | |
iter_ = lhs_->begin (); | |
end_ = lhs_->end (); | |
while (!r_->empty () && iter_ != end_) | |
{ | |
typename equivset_list::list::iterator l_iter_ = iter_; | |
(*l_iter_)->intersect (*r_.get (), *overlap_.get ()); | |
if (overlap_->empty ()) | |
{ | |
++iter_; | |
} | |
else if ((*l_iter_)->empty ()) | |
{ | |
delete *l_iter_; | |
*l_iter_ = overlap_.release (); | |
// VC++ 6 Hack: | |
equivset_ptr temp_overlap_ (new equivset); | |
overlap_ = temp_overlap_; | |
++iter_; | |
} | |
else if (r_->empty ()) | |
{ | |
delete r_.release (); | |
r_ = overlap_; | |
// VC++ 6 Hack: | |
equivset_ptr temp_overlap_ (new equivset); | |
overlap_ = temp_overlap_; | |
break; | |
} | |
else | |
{ | |
iter_ = lhs_->insert (++iter_, | |
static_cast<equivset *>(0)); | |
*iter_ = overlap_.release (); | |
// VC++ 6 Hack: | |
equivset_ptr temp_overlap_ (new equivset); | |
overlap_ = temp_overlap_; | |
++iter_; | |
end_ = lhs_->end (); | |
} | |
} | |
if (!r_->empty ()) | |
{ | |
lhs_->push_back (static_cast<equivset *>(0)); | |
lhs_->back () = r_.release (); | |
} | |
} | |
} | |
} | |
static void fill_rhs_list (const node_vector *vector_, | |
const index_set_vector &set_mapping_, equivset_list &list_) | |
{ | |
typename node_vector::const_iterator iter_ = | |
vector_->begin (); | |
typename node_vector::const_iterator end_ = | |
vector_->end (); | |
for (; iter_ != end_; ++iter_) | |
{ | |
const detail::node *node_ = *iter_; | |
if (!node_->end_state ()) | |
{ | |
const std::size_t token_ = node_->token (); | |
if (token_ != null_token) | |
{ | |
list_->push_back (static_cast<equivset *>(0)); | |
if (token_ == bol_token || token_ == eol_token) | |
{ | |
std::set<std::size_t> index_set_; | |
index_set_.insert (token_); | |
list_->back () = new equivset (index_set_, | |
node_->greedy (), token_, node_->followpos ()); | |
} | |
else | |
{ | |
list_->back () = new equivset (set_mapping_[token_], | |
node_->greedy (), token_, node_->followpos ()); | |
} | |
} | |
} | |
} | |
} | |
static void fixup_bol (detail::node * &root_, | |
node_ptr_vector &node_ptr_vector_) | |
{ | |
typename detail::node::node_vector *first_ = &root_->firstpos (); | |
bool found_ = false; | |
typename detail::node::node_vector::const_iterator iter_ = | |
first_->begin (); | |
typename detail::node::node_vector::const_iterator end_ = | |
first_->end (); | |
for (; iter_ != end_; ++iter_) | |
{ | |
const detail::node *node_ = *iter_; | |
found_ = !node_->end_state () && node_->token () == bol_token; | |
if (found_) break; | |
} | |
if (!found_) | |
{ | |
node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0)); | |
node_ptr_vector_->back () = new detail::leaf_node | |
(bol_token, true); | |
detail::node *lhs_ = node_ptr_vector_->back (); | |
node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0)); | |
node_ptr_vector_->back () = new detail::leaf_node | |
(null_token, true); | |
detail::node *rhs_ = node_ptr_vector_->back (); | |
node_ptr_vector_->push_back | |
(static_cast<detail::selection_node *>(0)); | |
node_ptr_vector_->back () = | |
new detail::selection_node (lhs_, rhs_); | |
lhs_ = node_ptr_vector_->back (); | |
node_ptr_vector_->push_back | |
(static_cast<detail::sequence_node *>(0)); | |
node_ptr_vector_->back () = | |
new detail::sequence_node (lhs_, root_); | |
root_ = node_ptr_vector_->back (); | |
} | |
} | |
static void minimise_dfa (const std::size_t dfa_alphabet_, | |
size_t_vector &dfa_, std::size_t size_) | |
{ | |
const std::size_t *first_ = &dfa_.front (); | |
const std::size_t *second_ = 0; | |
const std::size_t *end_ = first_ + size_; | |
std::size_t index_ = 1; | |
std::size_t new_index_ = 1; | |
std::size_t curr_index_ = 0; | |
index_set index_set_; | |
size_t_vector lookup_; | |
std::size_t *lookup_ptr_ = 0; | |
lookup_.resize (size_ / dfa_alphabet_, null_token); | |
lookup_ptr_ = &lookup_.front (); | |
*lookup_ptr_ = 0; | |
// Only one 'jam' state, so skip it. | |
first_ += dfa_alphabet_; | |
for (; first_ < end_; first_ += dfa_alphabet_, ++index_) | |
{ | |
for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1; | |
second_ < end_; second_ += dfa_alphabet_, ++curr_index_) | |
{ | |
if (index_set_.find (curr_index_) != index_set_.end ()) | |
{ | |
continue; | |
} | |
// Some systems have memcmp in namespace std. | |
using namespace std; | |
if (memcmp (first_, second_, sizeof (std::size_t) * | |
dfa_alphabet_) == 0) | |
{ | |
index_set_.insert (curr_index_); | |
lookup_ptr_[curr_index_] = new_index_; | |
} | |
} | |
if (lookup_ptr_[index_] == null_token) | |
{ | |
lookup_ptr_[index_] = new_index_; | |
++new_index_; | |
} | |
} | |
if (!index_set_.empty ()) | |
{ | |
const std::size_t *front_ = &dfa_.front (); | |
size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_); | |
typename index_set::iterator set_end_ = | |
index_set_.end (); | |
const std::size_t *ptr_ = front_ + dfa_alphabet_; | |
std::size_t *new_ptr_ = 0; | |
new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0); | |
new_ptr_ = &new_dfa_.front () + dfa_alphabet_; | |
size_ /= dfa_alphabet_; | |
for (index_ = 1; index_ < size_; ++index_) | |
{ | |
if (index_set_.find (index_) != set_end_) | |
{ | |
ptr_ += dfa_alphabet_; | |
continue; | |
} | |
new_ptr_[end_state_index] = ptr_[end_state_index]; | |
new_ptr_[id_index] = ptr_[id_index]; | |
new_ptr_[unique_id_index] = ptr_[unique_id_index]; | |
new_ptr_[state_index] = ptr_[state_index]; | |
new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]]; | |
new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]]; | |
new_ptr_ += dfa_offset; | |
ptr_ += dfa_offset; | |
for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_) | |
{ | |
*new_ptr_++ = lookup_ptr_[*ptr_++]; | |
} | |
} | |
dfa_.swap (new_dfa_); | |
} | |
} | |
}; | |
typedef basic_generator<char> generator; | |
typedef basic_generator<wchar_t> wgenerator; | |
} | |
} | |
#endif |