blob: 3871ac926151523d955254b2c228377d28dbf76a [file] [log] [blame]
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
// From Cormen, Leiserson and Rivest, ch 34.
#pragma once
namespace UnifiedRegex
{
template <typename C>
struct TextbookBoyerMooreSetup : private Chars<C>
{
private:
typedef typename Chars<C>::Char Char;
template <typename C>
friend class TextbookBoyerMoore;
template <typename C>
friend class TextbookBoyerMooreWithLinearMap;
public:
enum Scheme
{
DefaultScheme,
LinearScheme
};
TextbookBoyerMooreSetup(Char const * pat, CharCount patLen) : pat(pat), patLen(patLen) { Init(); }
Scheme GetScheme() const { return scheme; }
static int32 * GetGoodSuffix(ArenaAllocator* allocator, const Char * pat, CharCount patLen, int skip = 1);
private:
void Init();
Scheme scheme;
Char const * const pat;
CharCount const patLen;
uint numLinearChars;
Char linearChar[MaxCharMapLinearChars];
int32 lastOcc[MaxCharMapLinearChars];
};
template <typename C>
class TextbookBoyerMooreWithLinearMap : private Chars<C>
{
template <typename C>
friend struct TextbookBoyerMooreSetup;
typedef typename Chars<C>::Char Char;
private:
typedef CharMap<Char, int32, CharMapScheme_Linear> LastOccMap;
// NOTE: We don't store the actual pattern here since it may be moved between
// constructing the scanner and running it.
LastOccMap lastOccurrence;
int32 *goodSuffix;
public:
inline TextbookBoyerMooreWithLinearMap() : lastOccurrence(-1), goodSuffix(0) {}
// Construct Boyer-Moore tables for pattern pat:
// - pat must be of length patLen * skip
// - if skip is > 1, then each consecutive skip characters of pattern are assumed to represent
// an equivalence class of characters in canonical order (i.e. all chars in a class are represented
// by the same sequence of skip chars)
// - otherwise this is a regular exact-match pattern
void Setup(ArenaAllocator* allocator, TextbookBoyerMooreSetup<C> const& setup);
void FreeBody(ArenaAllocator* allocator, CharCount patLen);
// NOTE: In the following pat and patLen must be the same as passed to Setup above
// For skip = 1
template <uint equivClassSize>
bool Match
( const Char *const input
, const CharCount inputLength
, CharCount& inputOffset
, const Char* pat
, const CharCount patLen
#if ENABLE_REGEX_CONFIG_OPTIONS
, RegexStats* stats
#endif
) const;
#if ENABLE_REGEX_CONFIG_OPTIONS
static char16 const * GetName() { return _u("linear map Boyer-Moore"); }
#endif
};
template <typename C>
class TextbookBoyerMoore : private Chars<C>
{
template <typename C>
friend struct TextbookBoyerMooreSetup;
typedef typename Chars<C>::Char Char;
private:
typedef CharMap<Char, int32> LastOccMap;
// NOTE: We don't store the actual pattern here since it may be moved between
// constructing the scanner and running it.
Field(LastOccMap) lastOccurrence;
Field(int32 *) goodSuffix;
public:
inline TextbookBoyerMoore() : lastOccurrence(-1), goodSuffix(nullptr) {}
// Construct Boyer-Moore tables for pattern pat:
// - pat must be of length patLen * skip
// - if skip is > 1, then each consecutive skip characters of pattern are assumed to represent
// an equivalence class of characters in canonical order (i.e. all chars in a class are represented
// by the same sequence of skip chars)
// - otherwise this is a regular exact-match pattern
void Setup(ArenaAllocator* allocator, TextbookBoyerMooreSetup<C> const& setup);
void Setup(ArenaAllocator * allocator, const Char * pat, CharCount patLen, int skip);
void FreeBody(ArenaAllocator* allocator, CharCount patLen);
// NOTE: In the following pat and patLen must be the same as passed to Setup above
template <uint equivClassSize>
inline bool Match
( const Char *const input
, const CharCount inputLength
, CharCount& inputOffset
, const Char* pat
, const CharCount patLen
#if ENABLE_REGEX_CONFIG_OPTIONS
, RegexStats* stats
#endif
) const
{
return Match<equivClassSize, equivClassSize>(input, inputLength, inputOffset, pat, patLen
#if ENABLE_REGEX_CONFIG_OPTIONS
, stats
#endif
);
}
template <uint equivClassSize, uint lastPatCharEquivClass>
bool Match
( const Char *const input
, const CharCount inputLength
, CharCount& inputOffset
, const Char* pat
, const CharCount patLen
#if ENABLE_REGEX_CONFIG_OPTIONS
, RegexStats* stats
#endif
) const;
#if ENABLE_REGEX_CONFIG_OPTIONS
static char16 const * GetName() { return _u("full map Boyer-Moore"); }
#endif
};
}