| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| #include "ParserPch.h" |
| |
| namespace UnifiedRegex |
| { |
| // ---------------------------------------------------------------------- |
| // CountDomain |
| // ---------------------------------------------------------------------- |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void CountDomain::Print(DebugWriter* w) const |
| { |
| if (upper != CharCountFlag && lower == (CharCount)upper) |
| { |
| w->Print(_u("[%u]"), lower); |
| } |
| else |
| { |
| w->Print(_u("[%u-"), lower); |
| if (upper == CharCountFlag) |
| w->Print(_u("inf]")); |
| else |
| w->Print(_u("%u]"), (CharCount)upper); |
| } |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Matcher (inlined, called from instruction Exec methods) |
| // ---------------------------------------------------------------------- |
| |
| #define PUSH(contStack, T, ...) (new (contStack.Push<T>()) T(__VA_ARGS__)) |
| #define PUSHA(assertionStack, T, ...) (new (assertionStack.Push()) T(__VA_ARGS__)) |
| #define L2I(O, label) LabelToInstPointer<O##Inst>(Inst::InstTag::O, label) |
| |
| #define FAIL_PARAMETERS input, inputOffset, instPointer, contStack, assertionStack, qcTicks |
| #define HARDFAIL_PARAMETERS(mode) input, inputLength, matchStart, inputOffset, instPointer, contStack, assertionStack, qcTicks, mode |
| |
| // Regex QC heuristics: |
| // - TicksPerQC |
| // - Number of ticks from a previous QC needed to cause another QC. The value affects how often QC will be triggered, so |
| // on slower machines or debug builds, the value needs to be smaller to maintain a reasonable frequency of QCs. |
| // - TicksPerQcTimeCheck |
| // - Number of ticks from a previous QC needed to trigger a time check. Elapsed time from the previous QC is checked to |
| // see if a QC needs to be triggered. The value must be less than TicksPerQc and small enough to reasonably guarantee |
| // a QC every TimePerQc milliseconds without affecting perf. |
| // - TimePerQc |
| // - The target time between QCs |
| |
| #if defined(_M_ARM) |
| const uint Matcher::TicksPerQc = 1u << 19 |
| #else |
| const uint Matcher::TicksPerQc = 1u << (AutoSystemInfo::ShouldQCMoreFrequently() ? 17 : 21) |
| #endif |
| #if DBG |
| >> 2 |
| #endif |
| ; |
| |
| const uint Matcher::TicksPerQcTimeCheck = Matcher::TicksPerQc >> 2; |
| const uint Matcher::TimePerQc = AutoSystemInfo::ShouldQCMoreFrequently() ? 50 : 100; // milliseconds |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Matcher::PushStats(ContStack& contStack, const Char* const input) const |
| { |
| if (stats != 0) |
| { |
| stats->numPushes++; |
| if (contStack.Position() > stats->stackHWM) |
| stats->stackHWM = contStack.Position(); |
| } |
| if (w != 0) |
| { |
| w->Print(_u("PUSH ")); |
| contStack.Top()->Print(w, input); |
| } |
| } |
| |
| void Matcher::PopStats(ContStack& contStack, const Char* const input) const |
| { |
| if (stats != 0) |
| { |
| stats->numPops++; |
| } |
| if (w != 0) |
| { |
| const Cont* top = contStack.Top(); |
| if (top == 0) |
| w->PrintEOL(_u("<empty stack>")); |
| else |
| { |
| w->Print(_u("POP ")); |
| top->Print(w, input); |
| } |
| } |
| } |
| |
| void Matcher::UnPopStats(ContStack& contStack, const Char* const input) const |
| { |
| if (stats != 0) |
| { |
| stats->numPops--; |
| } |
| if (w != 0) |
| { |
| const Cont* top = contStack.Top(); |
| if (top == 0) |
| w->PrintEOL(_u("<empty stack>")); |
| else |
| { |
| w->Print(_u("UNPOP ")); |
| top->Print(w, input); |
| } |
| } |
| } |
| |
| void Matcher::CompStats() const |
| { |
| if (stats != 0) |
| { |
| stats->numCompares++; |
| } |
| } |
| |
| void Matcher::InstStats() const |
| { |
| if (stats != 0) |
| { |
| stats->numInsts++; |
| } |
| } |
| #endif |
| |
| inline void Matcher::QueryContinue(uint &qcTicks) |
| { |
| // See definition of TimePerQc for description of regex QC heuristics |
| |
| Assert(!(TicksPerQc & TicksPerQc - 1)); // must be a power of 2 |
| Assert(!(TicksPerQcTimeCheck & TicksPerQcTimeCheck - 1)); // must be a power of 2 |
| Assert(TicksPerQcTimeCheck < TicksPerQc); |
| |
| if (PHASE_OFF1(Js::RegexQcPhase)) |
| { |
| return; |
| } |
| if (++qcTicks & TicksPerQcTimeCheck - 1) |
| { |
| return; |
| } |
| DoQueryContinue(qcTicks); |
| } |
| |
| inline bool Matcher::HardFail( |
| const Char* const input |
| , const CharCount inputLength |
| , CharCount &matchStart |
| , CharCount &inputOffset |
| , const uint8 *&instPointer |
| , ContStack &contStack |
| , AssertionStack &assertionStack |
| , uint &qcTicks |
| , HardFailMode mode) |
| { |
| switch (mode) |
| { |
| case HardFailMode::BacktrackAndLater: |
| return Fail(FAIL_PARAMETERS); |
| case HardFailMode::BacktrackOnly: |
| if (Fail(FAIL_PARAMETERS)) |
| { |
| // No use trying any more start positions |
| matchStart = inputLength; |
| return true; // STOP EXECUTING |
| } |
| else |
| { |
| return false; |
| } |
| case HardFailMode::LaterOnly: |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| if (w != 0) |
| { |
| w->PrintEOL(_u("CLEAR")); |
| } |
| #endif |
| contStack.Clear(); |
| assertionStack.Clear(); |
| return true; // STOP EXECUTING |
| case HardFailMode::ImmediateFail: |
| // No use trying any more start positions |
| matchStart = inputLength; |
| return true; // STOP EXECUTING |
| default: |
| Assume(false); |
| } |
| |
| return true; |
| } |
| |
| inline bool Matcher::PopAssertion(CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, bool succeeded) |
| { |
| AssertionInfo* info = assertionStack.Top(); |
| Assert(info != 0); |
| assertionStack.Pop(); |
| BeginAssertionInst* begin = L2I(BeginAssertion, info->beginLabel); |
| |
| // Cut the existing continuations (we never backtrack into an assertion) |
| // NOTE: We don't include the effective pops in the stats |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| if (w != 0) |
| { |
| w->PrintEOL(_u("POP TO %llu"), (unsigned long long)info->contStackPosition); |
| } |
| #endif |
| contStack.PopTo(info->contStackPosition); |
| |
| // succeeded isNegation action |
| // --------- ---------- ---------------------------------------------------------------------------------- |
| // false false Fail into outer continuations (inner group bindings will have been undone) |
| // true false Jump to next label (inner group bindings are now frozen) |
| // false true Jump to next label (inner group bindings will have been undone and are now frozen) |
| // true true Fail into outer continuations (inner group binding MUST BE CLEARED) |
| |
| if (succeeded && begin->isNegation) |
| { |
| ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId); |
| } |
| |
| if (succeeded == begin->isNegation) |
| { |
| // Assertion failed |
| return false; |
| } |
| else |
| { |
| // Continue with next label but from original input position |
| inputOffset = info->startInputOffset; |
| instPointer = LabelToInstPointer(begin->nextLabel); |
| |
| return true; |
| } |
| } |
| |
| inline void Matcher::SaveInnerGroups( |
| const int fromGroupId, |
| const int toGroupId, |
| const bool reset, |
| const Char *const input, |
| ContStack &contStack) |
| { |
| if (toGroupId >= 0) |
| { |
| DoSaveInnerGroups(fromGroupId, toGroupId, reset, input, contStack); |
| } |
| } |
| |
| void Matcher::DoSaveInnerGroups( |
| const int fromGroupId, |
| const int toGroupId, |
| const bool reset, |
| const Char *const input, |
| ContStack &contStack) |
| { |
| Assert(fromGroupId >= 0); |
| Assert(toGroupId >= 0); |
| Assert(fromGroupId <= toGroupId); |
| |
| int undefinedRangeFromId = -1; |
| int groupId = fromGroupId; |
| do |
| { |
| GroupInfo *const groupInfo = GroupIdToGroupInfo(groupId); |
| if (groupInfo->IsUndefined()) |
| { |
| if (undefinedRangeFromId < 0) |
| { |
| undefinedRangeFromId = groupId; |
| } |
| continue; |
| } |
| |
| if (undefinedRangeFromId >= 0) |
| { |
| Assert(groupId > 0); |
| DoSaveInnerGroups_AllUndefined(undefinedRangeFromId, groupId - 1, input, contStack); |
| undefinedRangeFromId = -1; |
| } |
| |
| PUSH(contStack, RestoreGroupCont, groupId, *groupInfo); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| PushStats(contStack, input); |
| #endif |
| |
| if (reset) |
| { |
| groupInfo->Reset(); |
| } |
| } while (++groupId <= toGroupId); |
| if (undefinedRangeFromId >= 0) |
| { |
| Assert(toGroupId >= 0); |
| DoSaveInnerGroups_AllUndefined(undefinedRangeFromId, toGroupId, input, contStack); |
| } |
| } |
| |
| inline void Matcher::SaveInnerGroups_AllUndefined( |
| const int fromGroupId, |
| const int toGroupId, |
| const Char *const input, |
| ContStack &contStack) |
| { |
| if (toGroupId >= 0) |
| { |
| DoSaveInnerGroups_AllUndefined(fromGroupId, toGroupId, input, contStack); |
| } |
| } |
| |
| void Matcher::DoSaveInnerGroups_AllUndefined( |
| const int fromGroupId, |
| const int toGroupId, |
| const Char *const input, |
| ContStack &contStack) |
| { |
| Assert(fromGroupId >= 0); |
| Assert(toGroupId >= 0); |
| Assert(fromGroupId <= toGroupId); |
| |
| #if DBG |
| for (int groupId = fromGroupId; groupId <= toGroupId; ++groupId) |
| { |
| Assert(GroupIdToGroupInfo(groupId)->IsUndefined()); |
| } |
| #endif |
| |
| if (fromGroupId == toGroupId) |
| { |
| PUSH(contStack, ResetGroupCont, fromGroupId); |
| } |
| else |
| { |
| PUSH(contStack, ResetGroupRangeCont, fromGroupId, toGroupId); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| PushStats(contStack, input); |
| #endif |
| } |
| |
| inline void Matcher::ResetGroup(int groupId) |
| { |
| GroupInfo* info = GroupIdToGroupInfo(groupId); |
| info->Reset(); |
| } |
| |
| inline void Matcher::ResetInnerGroups(int minGroupId, int maxGroupId) |
| { |
| for (int i = minGroupId; i <= maxGroupId; i++) |
| { |
| ResetGroup(i); |
| } |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| bool Inst::IsBaselineMode() |
| { |
| return Js::Configuration::Global.flags.BaselineMode; |
| } |
| |
| Label Inst::GetPrintLabel(Label label) |
| { |
| return IsBaselineMode() ? (Label)0xFFFF : label; |
| } |
| |
| template <typename T> |
| void Inst::PrintBytes(DebugWriter *w, Inst *inst, T *that, const char16 *annotation) const |
| { |
| T *start = (T*)that; |
| byte *startByte = (byte *)start; |
| byte *baseByte = (byte *)inst; |
| ptrdiff_t offset = startByte - baseByte; |
| size_t size = sizeof(*((T *)that)); |
| byte *endByte = startByte + size; |
| byte *currentByte = startByte; |
| w->Print(_u("0x%p[+0x%03x](0x%03x) [%s]:"), startByte, offset, size, annotation); |
| |
| for (; currentByte < endByte; ++currentByte) |
| { |
| if ((currentByte - endByte) % 4 == 0) |
| { |
| w->Print(_u(" "), *currentByte); |
| } |
| w->Print(_u("%02x"), *currentByte); |
| } |
| w->PrintEOL(_u("")); |
| } |
| |
| template <> |
| void Inst::PrintBytes(DebugWriter *w, Inst *inst, Inst *that, const char16 *annotation) const |
| { |
| Inst *start = (Inst *)that; |
| |
| size_t baseSize = sizeof(*(Inst *)that); |
| ptrdiff_t offsetToData = (byte *)&(start->tag) - ((byte *)start); |
| size_t size = baseSize - offsetToData; |
| |
| byte *startByte = (byte *)(&(start->tag)); |
| byte *endByte = startByte + size; |
| byte *currentByte = startByte; |
| w->Print(_u("0x%p[+0x%03x](0x%03x) [%s]:"), startByte, offsetToData, size, annotation); |
| for (; currentByte < endByte; ++currentByte) |
| { |
| if ((currentByte - endByte) % 4 == 0) |
| { |
| w->Print(_u(" "), *currentByte); |
| } |
| w->Print(_u("%02x"), *currentByte); |
| } |
| w->PrintEOL(_u("")); |
| } |
| |
| #define PRINT_BYTES(InstType) \ |
| Inst::PrintBytes<InstType>(w, (Inst *)this, (InstType *)this, _u(#InstType)) |
| |
| #define PRINT_BYTES_ANNOTATED(InstType, Annotation) \ |
| Inst::PrintBytes<InstType>(w, (Inst *)this, (InstType *)this, (Annotation)) |
| |
| #define PRINT_MIXIN(Mixin) \ |
| ((Mixin *)this)->Print(w, litbuf) |
| |
| #define PRINT_MIXIN_ARGS(Mixin, ...) \ |
| ((Mixin *)this)->Print(w, litbuf, __VA_ARGS__) |
| |
| #define PRINT_MIXIN_COMMA(Mixin) \ |
| PRINT_MIXIN(Mixin); \ |
| w->Print(_u(", ")); |
| |
| #define PRINT_RE_BYTECODE_BEGIN(Name) \ |
| w->Print(_u("L%04x: "), label); \ |
| if (REGEX_CONFIG_FLAG(RegexBytecodeDebug)) \ |
| { \ |
| w->Print(_u("(0x%03x bytes) "), sizeof(*this)); \ |
| } \ |
| w->Print(_u(Name)); \ |
| w->Print(_u("(")); |
| |
| #define PRINT_RE_BYTECODE_MID() \ |
| w->PrintEOL(_u(")")); \ |
| if (REGEX_CONFIG_FLAG(RegexBytecodeDebug)) \ |
| { \ |
| w->Indent(); \ |
| PRINT_BYTES(Inst); |
| |
| #define PRINT_RE_BYTECODE_END() \ |
| w->Unindent(); \ |
| } \ |
| return sizeof(*this); |
| |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Mixins |
| // ---------------------------------------------------------------------- |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void BackupMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("backup: ")); |
| backup.Print(w); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void CharMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("c: ")); |
| w->PrintQuotedChar(c); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Char2Mixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("c0: ")); |
| w->PrintQuotedChar(cs[0]); |
| w->Print(_u(", c1: ")); |
| w->PrintQuotedChar(cs[1]); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Char3Mixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("c0: ")); |
| w->PrintQuotedChar(cs[0]); |
| w->Print(_u(", c1: ")); |
| w->PrintQuotedChar(cs[1]); |
| w->Print(_u(", c2: ")); |
| w->PrintQuotedChar(cs[2]); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Char4Mixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("c0: ")); |
| w->PrintQuotedChar(cs[0]); |
| w->Print(_u(", c1: ")); |
| w->PrintQuotedChar(cs[1]); |
| w->Print(_u(", c2: ")); |
| w->PrintQuotedChar(cs[2]); |
| w->Print(_u(", c3: ")); |
| w->PrintQuotedChar(cs[3]); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void LiteralMixin::Print(DebugWriter* w, const char16* litbuf, bool isEquivClass) const |
| { |
| if (isEquivClass) |
| { |
| w->Print(_u("equivLiterals: ")); |
| for (int i = 0; i < CaseInsensitive::EquivClassSize; i++) |
| { |
| if (i > 0) |
| { |
| w->Print(_u(", ")); |
| } |
| w->Print(_u("\"")); |
| for (CharCount j = 0; j < length; j++) |
| { |
| w->PrintEscapedChar(litbuf[offset + j * CaseInsensitive::EquivClassSize + i]); |
| } |
| w->Print(_u("\"")); |
| } |
| } |
| else |
| { |
| w->Print(_u("literal: ")); |
| w->PrintQuotedString(litbuf + offset, length); |
| } |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Char2LiteralScannerMixin |
| // ---------------------------------------------------------------------- |
| |
| bool Char2LiteralScannerMixin::Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const |
| { |
| if (inputLength == 0) |
| { |
| return false; |
| } |
| |
| const uint matchC0 = Chars<char16>::CTU(cs[0]); |
| const uint matchC1 = Chars<char16>::CTU(cs[1]); |
| |
| const char16 * currentInput = input + inputOffset; |
| const char16 * endInput = input + inputLength - 1; |
| |
| while (currentInput < endInput) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (true) |
| { |
| const uint c1 = Chars<char16>::CTU(currentInput[1]); |
| if (c1 != matchC1) |
| { |
| if (c1 == matchC0) |
| { |
| break; |
| } |
| currentInput += 2; |
| if (currentInput >= endInput) |
| { |
| return false; |
| } |
| continue; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| // Check the first character |
| const uint c0 = Chars<char16>::CTU(*currentInput); |
| if (c0 == matchC0) |
| { |
| inputOffset = (CharCount)(currentInput - input); |
| return true; |
| } |
| if (matchC0 == matchC1) |
| { |
| break; |
| } |
| currentInput +=2; |
| if (currentInput >= endInput) |
| { |
| return false; |
| } |
| } |
| |
| // If the second character in the buffer matches the first in the pattern, continue |
| // to see if the next character has the second in the pattern |
| currentInput++; |
| while (currentInput < endInput) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| const uint c1 = Chars<char16>::CTU(currentInput[1]); |
| if (c1 == matchC1) |
| { |
| inputOffset = (CharCount)(currentInput - input); |
| return true; |
| } |
| if (c1 != matchC0) |
| { |
| currentInput += 2; |
| break; |
| } |
| currentInput++; |
| } |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Char2LiteralScannerMixin::Print(DebugWriter* w, const char16 * litbuf) const |
| { |
| Char2Mixin::Print(w, litbuf); |
| w->Print(_u(" (with two character literal scanner)")); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ScannerMixinT |
| // ---------------------------------------------------------------------- |
| |
| template <typename ScannerT> |
| void ScannerMixinT<ScannerT>::FreeBody(ArenaAllocator* rtAllocator) |
| { |
| scanner.FreeBody(rtAllocator, length); |
| } |
| |
| template <typename ScannerT> |
| inline bool |
| ScannerMixinT<ScannerT>::Match(Matcher& matcher, const char16 * const input, const CharCount inputLength, CharCount& inputOffset) const |
| { |
| Assert(length <= matcher.program->rep.insts.litbufLen - offset); |
| return scanner.template Match<1>( |
| input |
| , inputLength |
| , inputOffset |
| , matcher.program->rep.insts.litbuf + offset |
| , length |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , matcher.stats |
| #endif |
| ); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template <typename ScannerT> |
| void ScannerMixinT<ScannerT>::Print(DebugWriter* w, const char16* litbuf, bool isEquivClass) const |
| { |
| LiteralMixin::Print(w, litbuf, isEquivClass); |
| w->Print(_u(" (with %s scanner)"), ScannerT::GetName()); |
| } |
| #endif |
| |
| // explicit instantiation |
| template struct ScannerMixinT<TextbookBoyerMoore<char16>>; |
| template struct ScannerMixinT<TextbookBoyerMooreWithLinearMap<char16>>; |
| |
| // ---------------------------------------------------------------------- |
| // EquivScannerMixinT |
| // ---------------------------------------------------------------------- |
| |
| template <uint lastPatCharEquivClassSize> |
| inline bool EquivScannerMixinT<lastPatCharEquivClassSize>::Match(Matcher& matcher, const char16* const input, const CharCount inputLength, CharCount& inputOffset) const |
| { |
| Assert(length * CaseInsensitive::EquivClassSize <= matcher.program->rep.insts.litbufLen - offset); |
| CompileAssert(lastPatCharEquivClassSize >= 1 && lastPatCharEquivClassSize <= CaseInsensitive::EquivClassSize); |
| return scanner.Match<CaseInsensitive::EquivClassSize, lastPatCharEquivClassSize>( |
| input |
| , inputLength |
| , inputOffset |
| , matcher.program->rep.insts.litbuf + offset |
| , length |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , matcher.stats |
| #endif |
| ); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template <uint lastPatCharEquivClassSize> |
| void EquivScannerMixinT<lastPatCharEquivClassSize>::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| __super::Print(w, litbuf, true); |
| w->Print(_u(" (last char equiv size:%d)"), lastPatCharEquivClassSize); |
| } |
| |
| // explicit instantiation |
| template struct EquivScannerMixinT<1>; |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ScannerInfo |
| // ---------------------------------------------------------------------- |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void ScannerInfo::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| ScannerMixin::Print(w, litbuf, isEquivClass); |
| } |
| #endif |
| |
| ScannerInfo* ScannersMixin::Add(Recycler *recycler, Program *program, CharCount offset, CharCount length, bool isEquivClass) |
| { |
| Assert(numLiterals < MaxNumSyncLiterals); |
| return program->AddScannerForSyncToLiterals(recycler, numLiterals++, offset, length, isEquivClass); |
| } |
| |
| void ScannersMixin::FreeBody(ArenaAllocator* rtAllocator) |
| { |
| for (int i = 0; i < numLiterals; i++) |
| { |
| infos[i]->FreeBody(rtAllocator); |
| #if DBG |
| infos[i] = nullptr; |
| #endif |
| } |
| #if DBG |
| numLiterals = 0; |
| #endif |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void ScannersMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("literals: {")); |
| for (int i = 0; i < numLiterals; i++) |
| { |
| if (i > 0) |
| { |
| w->Print(_u(", ")); |
| } |
| infos[i]->Print(w, litbuf); |
| } |
| w->Print(_u("}")); |
| } |
| #endif |
| |
| template<bool IsNegation> |
| void SetMixin<IsNegation>::FreeBody(ArenaAllocator* rtAllocator) |
| { |
| set.FreeBody(rtAllocator); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<bool IsNegation> |
| void SetMixin<IsNegation>::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("set: ")); |
| if (IsNegation) |
| { |
| w->Print(_u("not ")); |
| } |
| set.Print(w); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void TrieMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->PrintEOL(_u("")); |
| trie.Print(w); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void GroupMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("groupId: %d"), groupId); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void ChompBoundedMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("repeats: ")); |
| repeats.Print(w); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void JumpMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("targetLabel: L%04x"), Inst::GetPrintLabel(targetLabel)); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void BodyGroupsMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("minBodyGroupId: %d, maxBodyGroupId: %d"), minBodyGroupId, maxBodyGroupId); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void BeginLoopBasicsMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("loopId: %d, repeats: "), loopId); |
| repeats.Print(w); |
| w->Print(_u(", hasOuterLoops: %s"), hasOuterLoops ? _u("true") : _u("false")); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void BeginLoopMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| BeginLoopBasicsMixin::Print(w, litbuf); |
| w->Print(_u(", hasInnerNondet: %s, exitLabel: L%04x, "), |
| hasInnerNondet ? _u("true") : _u("false"), Inst::GetPrintLabel(exitLabel)); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void GreedyMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("greedy: %s"), isGreedy ? _u("true") : _u("false")); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void RepeatLoopMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("beginLabel: L%04x"), Inst::GetPrintLabel(beginLabel)); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void GreedyLoopNoBacktrackMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("loopId: %d, exitLabel: L%04x"), loopId, exitLabel); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void TryMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("failLabel: L%04x"), Inst::GetPrintLabel(failLabel)); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void NegationMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("isNegation: %s"), isNegation ? _u("true") : _u("false")); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void NextLabelMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("nextLabel: L%04x"), Inst::GetPrintLabel(nextLabel)); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void FixedLengthMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("length: %u"), length); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void FollowFirstMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("followFirst: %c"), followFirst); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void NoNeedToSaveMixin::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->Print(_u("noNeedToSave: %s"), noNeedToSave ? _u("true") : _u("false")); |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void SwitchCase::Print(DebugWriter* w) const |
| { |
| w->Print(_u("case ")); |
| w->PrintQuotedChar(c); |
| w->PrintEOL(_u(": Jump(L%04x)"), targetLabel); |
| } |
| #endif |
| |
| template <uint8 n> |
| void SwitchMixin<n>::AddCase(char16 c, Label targetLabel) |
| { |
| AnalysisAssert(numCases < MaxCases); |
| uint8 i; |
| for (i = 0; i < numCases; i++) |
| { |
| Assert(cases[i].c != c); |
| if (cases[i].c > c) |
| { |
| break; |
| } |
| } |
| __analysis_assume(numCases < MaxCases); |
| for (uint8 j = numCases; j > i; j--) |
| { |
| cases[j] = cases[j - 1]; |
| } |
| cases[i].c = c; |
| cases[i].targetLabel = targetLabel; |
| numCases++; |
| } |
| |
| void UnifiedRegexSwitchMixinForceAllInstantiations() |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| #define SWITCH_FORCE_INSTANTIATION_PRINT x.Print(0, 0) |
| #else |
| #define SWITCH_FORCE_INSTANTIATION_PRINT |
| #endif |
| |
| #define SWITCH_FORCE_INSTANTIATION(n) \ |
| { \ |
| SwitchMixin<n> x; \ |
| x.AddCase(0, 0); \ |
| SWITCH_FORCE_INSTANTIATION_PRINT; \ |
| } |
| |
| SWITCH_FORCE_INSTANTIATION(2); |
| SWITCH_FORCE_INSTANTIATION(4); |
| SWITCH_FORCE_INSTANTIATION(8); |
| SWITCH_FORCE_INSTANTIATION(16); |
| SWITCH_FORCE_INSTANTIATION(24); |
| |
| #undef SWITCH_FORCE_INSTANTIATION_PRINT |
| #undef SWITCH_FORCE_INSTANTIATION |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template <uint8 n> |
| void SwitchMixin<n>::Print(DebugWriter* w, const char16* litbuf) const |
| { |
| w->EOL(); |
| w->Indent(); |
| for (uint8 i = 0; i < numCases; i++) |
| { |
| cases[i].Print(w); |
| } |
| w->Unindent(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // NopInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool NopInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| return false; // don't stop execution |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int NopInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("Nop"); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(NopInst); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // FailInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool FailInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int FailInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("Fail"); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(NopInst); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SuccInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool SuccInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| GroupInfo* info = matcher.GroupIdToGroupInfo(0); |
| info->offset = matchStart; |
| info->length = inputOffset - matchStart; |
| return true; // STOP MATCHING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SuccInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("Succ"); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(NopInst); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // JumpInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool JumpInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| instPointer = matcher.LabelToInstPointer(targetLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int JumpInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("Jump"); |
| PRINT_MIXIN(JumpMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(JumpMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // JumpIfNotCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool JumpIfNotCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == c) |
| { |
| instPointer += sizeof(*this); |
| } |
| else |
| { |
| instPointer = matcher.LabelToInstPointer(targetLabel); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int JumpIfNotCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("JumpIfNotChar"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN(JumpMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(JumpMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchCharOrJumpInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchCharOrJumpInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == c) |
| { |
| inputOffset++; |
| instPointer += sizeof(*this); |
| } |
| else |
| { |
| instPointer = matcher.LabelToInstPointer(targetLabel); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchCharOrJumpInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchCharOrJump"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN(JumpMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(JumpMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // JumpIfNotSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool JumpIfNotSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && set.Get(input[inputOffset])) |
| { |
| instPointer += sizeof(*this); |
| } |
| else |
| { |
| instPointer = matcher.LabelToInstPointer(targetLabel); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int JumpIfNotSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("JumpIfNotSet"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN(JumpMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(JumpMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchSetOrJumpInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchSetOrJumpInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && set.Get(input[inputOffset])) |
| { |
| inputOffset++; |
| instPointer += sizeof(*this); |
| } |
| else |
| { |
| instPointer = matcher.LabelToInstPointer(targetLabel); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchSetOrJumpInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchSetOrJump"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN(JumpMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(JumpMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Switch(AndConsume)Inst (optimized instructions) |
| // ---------------------------------------------------------------------- |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| #define COMP_STATS matcher.CompStats() |
| #define SwitchAndConsumeInstPrintImpl(BaseName, n) \ |
| int BaseName##n##Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const \ |
| { \ |
| PRINT_RE_BYTECODE_BEGIN("SwitchAndConsume"#n); \ |
| PRINT_MIXIN(SwitchMixin<n>); \ |
| PRINT_RE_BYTECODE_MID(); \ |
| PRINT_BYTES(SwitchMixin<n>); \ |
| PRINT_RE_BYTECODE_END(); \ |
| } |
| #else |
| #define COMP_STATS |
| #define SwitchAndConsumeInstPrintImpl(BaseName, n) |
| #endif |
| |
| #define SwitchAndConsumeInstImpl(BaseName, n) \ |
| inline bool BaseName##n##Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const \ |
| { \ |
| if (inputOffset >= inputLength) \ |
| { \ |
| return matcher.Fail(FAIL_PARAMETERS); \ |
| } \ |
| \ |
| const uint8 localNumCases = numCases; \ |
| for (int i = 0; i < localNumCases; i++) \ |
| { \ |
| COMP_STATS; \ |
| if (cases[i].c == input[inputOffset]) \ |
| { \ |
| CONSUME; \ |
| instPointer = matcher.LabelToInstPointer(cases[i].targetLabel); \ |
| return false; \ |
| } \ |
| else if (cases[i].c > input[inputOffset]) \ |
| { \ |
| break; \ |
| } \ |
| } \ |
| \ |
| instPointer += sizeof(*this); \ |
| return false; \ |
| } \ |
| SwitchAndConsumeInstPrintImpl(BaseName, n); |
| |
| #define CONSUME |
| SwitchAndConsumeInstImpl(Switch, 2); |
| SwitchAndConsumeInstImpl(Switch, 4); |
| SwitchAndConsumeInstImpl(Switch, 8); |
| SwitchAndConsumeInstImpl(Switch, 16); |
| SwitchAndConsumeInstImpl(Switch, 24); |
| #undef CONSUME |
| |
| #define CONSUME inputOffset++ |
| SwitchAndConsumeInstImpl(SwitchAndConsume, 2); |
| SwitchAndConsumeInstImpl(SwitchAndConsume, 4); |
| SwitchAndConsumeInstImpl(SwitchAndConsume, 8); |
| SwitchAndConsumeInstImpl(SwitchAndConsume, 16); |
| SwitchAndConsumeInstImpl(SwitchAndConsume, 24); |
| #undef CONSUME |
| |
| #undef COMP_STATS |
| #undef SwitchAndConsumeInstPrintImpl |
| #undef SwitchAndConsumeInstImpl |
| |
| // ---------------------------------------------------------------------- |
| // BOITestInst |
| // ---------------------------------------------------------------------- |
| |
| template <> |
| BOITestInst<true>::BOITestInst() : Inst(InstTag::BOIHardFailTest) {} |
| template <> |
| BOITestInst<false>::BOITestInst() : Inst(InstTag::BOITest) {} |
| |
| template <bool canHardFail> |
| inline bool BOITestInst<canHardFail>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (inputOffset > 0) |
| { |
| if (canHardFail) |
| { |
| // Clearly trying to start from later in the input won't help, and we know backtracking can't take us earlier in the input |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| else |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| } |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template <bool canHardFail> |
| int BOITestInst<canHardFail>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (canHardFail) |
| { |
| PRINT_RE_BYTECODE_BEGIN("BOIHardFailTest"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("BOITest"); |
| } |
| |
| w->Print(_u("<hardFail>: %s"), canHardFail ? _u("true") : _u("false")); |
| |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // EOITestInst |
| // ---------------------------------------------------------------------- |
| |
| template <> |
| EOITestInst<true>::EOITestInst() : Inst(InstTag::EOIHardFailTest) {} |
| template <> |
| EOITestInst<false>::EOITestInst() : Inst(InstTag::EOITest) {} |
| |
| template <bool canHardFail> |
| inline bool EOITestInst<canHardFail>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (inputOffset < inputLength) |
| { |
| if (canHardFail) |
| { |
| // We know backtracking can never take us later in the input, but starting from later in the input could help |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::LaterOnly)); |
| } |
| else |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| } |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template <bool canHardFail> |
| int EOITestInst<canHardFail>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (canHardFail) |
| { |
| PRINT_RE_BYTECODE_BEGIN("EOIHardFailTest"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("EOITest"); |
| } |
| |
| w->Print(_u("<hardFail>: %s"), canHardFail ? _u("true") : _u("false")); |
| |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BOLTestInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool BOLTestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset > 0 && !matcher.standardChars->IsNewline(input[inputOffset - 1])) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BOLTestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BOLTest"); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // EOLTestInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool EOLTestInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && !matcher.standardChars->IsNewline(input[inputOffset])) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int EOLTestInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("EOLTest"); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // WordBoundaryTestInst |
| // ---------------------------------------------------------------------- |
| |
| template <> |
| WordBoundaryTestInst<true>::WordBoundaryTestInst() : Inst(InstTag::NegatedWordBoundaryTest) {} |
| template <> |
| WordBoundaryTestInst<false>::WordBoundaryTestInst() : Inst(InstTag::WordBoundaryTest) {} |
| |
| template <bool isNegation> |
| inline bool WordBoundaryTestInst<isNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| const bool prev = inputOffset > 0 && matcher.standardChars->IsWord(input[inputOffset - 1]); |
| const bool curr = inputOffset < inputLength && matcher.standardChars->IsWord(input[inputOffset]); |
| if (isNegation == (prev != curr)) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template <bool isNegation> |
| int WordBoundaryTestInst<isNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (isNegation) |
| { |
| PRINT_RE_BYTECODE_BEGIN("NegatedWordBoundaryTest"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("WordBoundaryTest"); |
| } |
| |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchCharInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset >= inputLength || input[inputOffset] != c) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchChar"); |
| PRINT_MIXIN(CharMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchChar2Inst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchChar2Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset >= inputLength || (input[inputOffset] != cs[0] && input[inputOffset] != cs[1])) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchChar2Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchChar2"); |
| PRINT_MIXIN(Char2Mixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char2Mixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchChar3Inst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchChar3Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset >= inputLength || (input[inputOffset] != cs[0] && input[inputOffset] != cs[1] && input[inputOffset] != cs[2])) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchChar3Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchChar3"); |
| PRINT_MIXIN(Char3Mixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char3Mixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchChar4Inst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchChar4Inst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset >= inputLength || (input[inputOffset] != cs[0] && input[inputOffset] != cs[1] && input[inputOffset] != cs[2] && input[inputOffset] != cs[3])) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchChar4Inst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchChar4"); |
| PRINT_MIXIN(Char4Mixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char4Mixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchSetInst |
| // ---------------------------------------------------------------------- |
| |
| template<bool IsNegation> |
| inline bool MatchSetInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset >= inputLength || this->set.Get(input[inputOffset]) == IsNegation) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<bool IsNegation> |
| int MatchSetInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (IsNegation) |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchNegatedSet"); |
| PRINT_MIXIN(SetMixin<true>); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchSet"); |
| PRINT_MIXIN(SetMixin<false>); |
| } |
| |
| PRINT_RE_BYTECODE_MID(); |
| IsNegation ? PRINT_BYTES(SetMixin<true>) : PRINT_BYTES(SetMixin<false>); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchLiteralInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchLiteralInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| Assert(length <= matcher.program->rep.insts.litbufLen - offset); |
| |
| if (length > inputLength - inputOffset) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| const Char *const literalBuffer = matcher.program->rep.insts.litbuf; |
| const Char * literalCurr = literalBuffer + offset; |
| const Char * inputCurr = input + inputOffset; |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (*literalCurr != *inputCurr) |
| { |
| inputOffset++; |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| const Char *const literalEnd = literalCurr + length; |
| literalCurr++; |
| inputCurr++; |
| |
| while (literalCurr < literalEnd) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (*literalCurr != *inputCurr++) |
| { |
| inputOffset = (CharCount)(inputCurr - input); |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| literalCurr++; |
| } |
| |
| inputOffset = (CharCount)(inputCurr - input); |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchLiteralInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchLiteral"); |
| PRINT_MIXIN_ARGS(LiteralMixin, false); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(LiteralMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchLiteralEquivInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchLiteralEquivInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (length > inputLength - inputOffset) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| const Char *const literalBuffer = matcher.program->rep.insts.litbuf; |
| CharCount literalOffset = offset; |
| const CharCount literalEndOffset = offset + length * CaseInsensitive::EquivClassSize; |
| |
| Assert(literalEndOffset <= matcher.program->rep.insts.litbufLen); |
| CompileAssert(CaseInsensitive::EquivClassSize == 4); |
| |
| do |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (input[inputOffset] != literalBuffer[literalOffset] |
| && input[inputOffset] != literalBuffer[literalOffset + 1] |
| && input[inputOffset] != literalBuffer[literalOffset + 2] |
| && input[inputOffset] != literalBuffer[literalOffset + 3]) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| inputOffset++; |
| literalOffset += CaseInsensitive::EquivClassSize; |
| } |
| while (literalOffset < literalEndOffset); |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchLiteralEquivInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchLiteralEquiv"); |
| PRINT_MIXIN_ARGS(LiteralMixin, true); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(LiteralMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchTrieInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchTrieInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (!trie.Match( |
| input |
| , inputLength |
| , inputOffset |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , matcher.stats |
| #endif |
| )) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| void MatchTrieInst::FreeBody(ArenaAllocator* rtAllocator) |
| { |
| trie.FreeBody(rtAllocator); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchTrieInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchTrie"); |
| PRINT_MIXIN(TrieMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(TrieMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // OptMatchCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool OptMatchCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == c) |
| { |
| inputOffset++; |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int OptMatchCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("OptMatchChar"); |
| PRINT_MIXIN(CharMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // OptMatchSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool OptMatchSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && set.Get(input[inputOffset])) |
| { |
| inputOffset++; |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int OptMatchSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("OptMatchSet"); |
| PRINT_MIXIN(SetMixin<false>); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToCharAndContinueInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool SyncToCharAndContinueInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const Char matchC = c; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputLength && input[inputOffset] != matchC) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| matchStart = inputOffset; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SyncToCharAndContinueInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToCharAndContinue"); |
| PRINT_MIXIN(CharMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToChar2SetAndContinueInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool SyncToChar2SetAndContinueInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const Char matchC0 = cs[0]; |
| const Char matchC1 = cs[1]; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputLength && input[inputOffset] != matchC0 && input[inputOffset] != matchC1) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| matchStart = inputOffset; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SyncToChar2SetAndContinueInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToChar2SetAndContinue"); |
| PRINT_MIXIN(Char2Mixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char2Mixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToSetAndContinueInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<bool IsNegation> |
| inline bool SyncToSetAndContinueInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| |
| while (inputOffset < inputLength && matchSet.Get(input[inputOffset]) == IsNegation) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| matchStart = inputOffset; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<bool IsNegation> |
| int SyncToSetAndContinueInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (IsNegation) |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToNegatedSetAndContinue"); |
| PRINT_MIXIN(SetMixin<true>); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToSetAndContinue"); |
| PRINT_MIXIN(SetMixin<false>); |
| } |
| |
| PRINT_RE_BYTECODE_MID(); |
| IsNegation ? PRINT_BYTES(SetMixin<true>) : PRINT_BYTES(SetMixin<false>); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToLiteralAndContinueInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template <typename ScannerT> |
| inline bool SyncToLiteralAndContinueInstT<ScannerT>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (!this->Match(matcher, input, inputLength, inputOffset)) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| matchStart = inputOffset; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| // explicit instantiation |
| template struct SyncToLiteralAndContinueInstT<Char2LiteralScannerMixin>; |
| template struct SyncToLiteralAndContinueInstT<ScannerMixin>; |
| template struct SyncToLiteralAndContinueInstT<ScannerMixin_WithLinearCharMap>; |
| template struct SyncToLiteralAndContinueInstT<EquivScannerMixin>; |
| template struct SyncToLiteralAndContinueInstT<EquivTrivialLastPatCharScannerMixin>; |
| |
| // Explicitly define each of these 5 Print functions so that the output will show the actual template param mixin and |
| // actual opcode name, even though the logic is basically the same in each definition. See notes below. |
| |
| template <> |
| int SyncToLiteralAndContinueInstT<Char2LiteralScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndContinueInstT<Char2LiteralScannerMixin> aka SyncToChar2LiteralAndContinue"); |
| PRINT_MIXIN(Char2LiteralScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char2LiteralScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndContinueInstT<ScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndContinueInstT<ScannerMixin> aka SyncToLiteralAndContinue"); |
| PRINT_MIXIN(ScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndContinueInstT<ScannerMixin_WithLinearCharMap>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndContinueInstT<ScannerMixin_WithLinearCharMap> aka SyncToLinearLiteralAndContinue"); |
| PRINT_MIXIN(ScannerMixin_WithLinearCharMap); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannerMixin_WithLinearCharMap); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndContinueInstT<EquivScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndContinueInstT<EquivScannerMixin> aka SyncToLiteralEquivAndContinue"); |
| PRINT_MIXIN(EquivScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EquivScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndContinueInstT<EquivTrivialLastPatCharScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndContinueInstT<EquivTrivialLastPatCharScannerMixin> aka SyncToLiteralEquivTrivialLastPatCharAndContinue"); |
| PRINT_MIXIN(EquivTrivialLastPatCharScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EquivTrivialLastPatCharScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToCharAndConsumeInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool SyncToCharAndConsumeInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const Char matchC = c; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputLength && input[inputOffset] != matchC) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset >= inputLength) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| matchStart = inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SyncToCharAndConsumeInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToCharAndConsume"); |
| PRINT_MIXIN(CharMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToChar2SetAndConsumeInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool SyncToChar2SetAndConsumeInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const Char matchC0 = cs[0]; |
| const Char matchC1 = cs[1]; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputLength && (input[inputOffset] != matchC0 && input[inputOffset] != matchC1)) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset >= inputLength) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| matchStart = inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SyncToChar2SetAndConsumeInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToChar2SetAndConsume"); |
| PRINT_MIXIN(Char2Mixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char2Mixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToSetAndConsumeInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<bool IsNegation> |
| inline bool SyncToSetAndConsumeInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputLength && matchSet.Get(input[inputOffset]) == IsNegation) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset >= inputLength) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| matchStart = inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<bool IsNegation> |
| int SyncToSetAndConsumeInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (IsNegation) |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToNegatedSetAndConsume"); |
| PRINT_MIXIN(SetMixin<true>); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToSetAndConsume"); |
| PRINT_MIXIN(SetMixin<false>); |
| } |
| |
| PRINT_RE_BYTECODE_MID(); |
| IsNegation ? PRINT_BYTES(SetMixin<true>) : PRINT_BYTES(SetMixin<false>); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToLiteralAndConsumeInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template <typename ScannerT> |
| inline bool SyncToLiteralAndConsumeInstT<ScannerT>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (!this->Match(matcher, input, inputLength, inputOffset)) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| matchStart = inputOffset; |
| inputOffset += ScannerT::GetLiteralLength(); |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| // explicit instantiation |
| template struct SyncToLiteralAndConsumeInstT<Char2LiteralScannerMixin>; |
| template struct SyncToLiteralAndConsumeInstT<ScannerMixin>; |
| template struct SyncToLiteralAndConsumeInstT<ScannerMixin_WithLinearCharMap>; |
| template struct SyncToLiteralAndConsumeInstT<EquivScannerMixin>; |
| template struct SyncToLiteralAndConsumeInstT<EquivTrivialLastPatCharScannerMixin>; |
| |
| // Explicitly define each of these 5 Print functions so that the output will show the actual template param mixin and |
| // actual opcode name, even though the logic is basically the same in each definition. See notes below. |
| |
| template <> |
| int SyncToLiteralAndConsumeInstT<Char2LiteralScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndConsumeInstT<Char2LiteralScannerMixin> aka SyncToChar2LiteralAndConsume"); |
| PRINT_MIXIN(Char2LiteralScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char2LiteralScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndConsumeInstT<ScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndConsumeInstT<ScannerMixin> aka SyncToLiteralAndConsume"); |
| PRINT_MIXIN(ScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndConsumeInstT<ScannerMixin_WithLinearCharMap>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndConsumeInstT<ScannerMixin_WithLinearCharMap> aka SyncToLinearLiteralAndConsume"); |
| PRINT_MIXIN(ScannerMixin_WithLinearCharMap); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannerMixin_WithLinearCharMap); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndConsumeInstT<EquivScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndConsumeInstT<EquivScannerMixin> aka SyncToLiteralEquivAndConsume"); |
| PRINT_MIXIN(EquivScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EquivScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndConsumeInstT<EquivTrivialLastPatCharScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndConsumeInstT<EquivTrivialLastPatCharScannerMixin> aka SyncToLiteralEquivTrivialLastPatCharAndConsume"); |
| PRINT_MIXIN(EquivTrivialLastPatCharScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EquivTrivialLastPatCharScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToCharAndBackupInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool SyncToCharAndBackupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (backup.lower > inputLength - matchStart) |
| { |
| // Even match at very end doesn't allow for minimum backup |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| if (inputOffset < nextSyncInputOffset) |
| { |
| // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync |
| // again since we'll sync to the same point in the input and back up to the same place we are at now |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| if (backup.lower > inputOffset - matchStart) |
| { |
| // No use looking for match until minimum backup is possible |
| inputOffset = matchStart + backup.lower; |
| } |
| |
| const Char matchC = c; |
| while (inputOffset < inputLength && input[inputOffset] != matchC) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset >= inputLength) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| nextSyncInputOffset = inputOffset + 1; |
| |
| if (backup.upper != CharCountFlag) |
| { |
| // Backup at most by backup.upper for new start |
| CharCount maxBackup = inputOffset - matchStart; |
| matchStart = inputOffset - min(maxBackup, (CharCount)backup.upper); |
| } |
| // else: leave start where it is |
| |
| // Move input to new match start |
| inputOffset = matchStart; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SyncToCharAndBackupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToCharAndBackup"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToSetAndBackupInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<bool IsNegation> |
| inline bool SyncToSetAndBackupInst<IsNegation>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (backup.lower > inputLength - matchStart) |
| { |
| // Even match at very end doesn't allow for minimum backup |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| if (inputOffset < nextSyncInputOffset) |
| { |
| // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync |
| // again since we'll sync to the same point in the input and back up to the same place we are at now |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| if (backup.lower > inputOffset - matchStart) |
| { |
| // No use looking for match until minimum backup is possible |
| inputOffset = matchStart + backup.lower; |
| } |
| |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| while (inputOffset < inputLength && matchSet.Get(input[inputOffset]) == IsNegation) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset >= inputLength) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| nextSyncInputOffset = inputOffset + 1; |
| |
| if (backup.upper != CharCountFlag) |
| { |
| // Backup at most by backup.upper for new start |
| CharCount maxBackup = inputOffset - matchStart; |
| matchStart = inputOffset - min(maxBackup, (CharCount)backup.upper); |
| } |
| // else: leave start where it is |
| |
| // Move input to new match start |
| inputOffset = matchStart; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<bool IsNegation> |
| int SyncToSetAndBackupInst<IsNegation>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (IsNegation) |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToNegatedSetAndBackup"); |
| PRINT_MIXIN_COMMA(SetMixin<true>); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToSetAndBackup"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| } |
| |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| IsNegation ? PRINT_BYTES(SetMixin<true>) : PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToLiteralAndBackupInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| template <typename ScannerT> |
| inline bool SyncToLiteralAndBackupInstT<ScannerT>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (backup.lower > inputLength - matchStart) |
| { |
| // Even match at very end doesn't allow for minimum backup |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| if(inputOffset < nextSyncInputOffset) |
| { |
| // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync |
| // again since we'll sync to the same point in the input and back up to the same place we are at now |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| if (backup.lower > inputOffset - matchStart) |
| { |
| // No use looking for match until minimum backup is possible |
| inputOffset = matchStart + backup.lower; |
| } |
| |
| if (!this->Match(matcher, input, inputLength, inputOffset)) |
| { |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| nextSyncInputOffset = inputOffset + 1; |
| |
| if (backup.upper != CharCountFlag) |
| { |
| // Set new start at most backup.upper from start of literal |
| CharCount maxBackup = inputOffset - matchStart; |
| matchStart = inputOffset - min(maxBackup, (CharCount)backup.upper); |
| } |
| // else: leave start where it is |
| |
| // Move input to new match start |
| inputOffset = matchStart; |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| // explicit instantiation |
| template struct SyncToLiteralAndBackupInstT<Char2LiteralScannerMixin>; |
| template struct SyncToLiteralAndBackupInstT<ScannerMixin>; |
| template struct SyncToLiteralAndBackupInstT<ScannerMixin_WithLinearCharMap>; |
| template struct SyncToLiteralAndBackupInstT<EquivScannerMixin>; |
| template struct SyncToLiteralAndBackupInstT<EquivTrivialLastPatCharScannerMixin>; |
| |
| // Explicitly define each of these 5 Print functions so that the output will show the actual template param mixin and |
| // actual opcode name, even though the logic is basically the same in each definition. See notes below. |
| |
| template <> |
| int SyncToLiteralAndBackupInstT<Char2LiteralScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndBackupInstT<Char2LiteralScannerMixin> aka SyncToChar2LiteralAndBackup"); |
| PRINT_MIXIN_COMMA(Char2LiteralScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(Char2LiteralScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndBackupInstT<ScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndBackupInstT<ScannerMixin> aka SyncToLiteralAndBackup"); |
| PRINT_MIXIN_COMMA(ScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndBackupInstT<ScannerMixin_WithLinearCharMap>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndBackupInstT<ScannerMixin_WithLinearCharMap> aka SyncToLinearLiteralAndBackup"); |
| PRINT_MIXIN_COMMA(ScannerMixin_WithLinearCharMap); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannerMixin_WithLinearCharMap); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndBackupInstT<EquivScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndBackupInstT<EquivScannerMixin> aka SyncToLiteralEquivAndBackup"); |
| PRINT_MIXIN_COMMA(EquivScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EquivScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| |
| template <> |
| int SyncToLiteralAndBackupInstT<EquivTrivialLastPatCharScannerMixin>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| // NOTE: this text is unique to this instantiation |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralAndBackupInstT<EquivTrivialLastPatCharScannerMixin> aka SyncToLiteralEquivTrivialLastPatCharAndBackup"); |
| PRINT_MIXIN_COMMA(EquivTrivialLastPatCharScannerMixin); // NOTE: would work with template <typename ScannerT> ScannerT::Print |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EquivTrivialLastPatCharScannerMixin); // NOTE: unique because macro expansion and _u(#InstType) happen before template is evaluated (so text would be ScannerT) |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // SyncToLiteralsAndBackupInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool SyncToLiteralsAndBackupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (backup.lower > inputLength - matchStart) |
| { |
| // Even match at very end doesn't allow for minimum backup |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| if (inputOffset < nextSyncInputOffset) |
| { |
| // We have not yet reached the offset in the input we last synced to before backing up, so it's unnecessary to sync |
| // again since we'll sync to the same point in the input and back up to the same place we are at now |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| if (backup.lower > inputOffset - matchStart) |
| { |
| // No use looking for match until minimum backup is possible |
| inputOffset = matchStart + backup.lower; |
| } |
| |
| int besti = -1; |
| CharCount bestMatchOffset = 0; |
| |
| if (matcher.literalNextSyncInputOffsets == nullptr) |
| { |
| Assert(numLiterals <= MaxNumSyncLiterals); |
| matcher.literalNextSyncInputOffsets = |
| RecyclerNewArrayLeaf(matcher.recycler, CharCount, ScannersMixin::MaxNumSyncLiterals); |
| } |
| CharCount* literalNextSyncInputOffsets = matcher.literalNextSyncInputOffsets; |
| |
| if (firstIteration) |
| { |
| for (int i = 0; i < numLiterals; i++) |
| { |
| literalNextSyncInputOffsets[i] = inputOffset; |
| } |
| } |
| |
| for (int i = 0; i < numLiterals; i++) |
| { |
| CharCount thisMatchOffset = literalNextSyncInputOffsets[i]; |
| if (inputOffset > thisMatchOffset) |
| { |
| thisMatchOffset = inputOffset; |
| } |
| |
| if (infos[i]->isEquivClass |
| ? (infos[i]->scanner.Match<CaseInsensitive::EquivClassSize>( |
| input |
| , inputLength |
| , thisMatchOffset |
| , matcher.program->rep.insts.litbuf + infos[i]->offset |
| , infos[i]->length |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , matcher.stats |
| #endif |
| )) |
| : (infos[i]->scanner.Match<1>( |
| input |
| , inputLength |
| , thisMatchOffset |
| , matcher.program->rep.insts.litbuf + infos[i]->offset |
| , infos[i]->length |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , matcher.stats |
| #endif |
| ))) |
| { |
| if (besti < 0 || thisMatchOffset < bestMatchOffset) |
| { |
| besti = i; |
| bestMatchOffset = thisMatchOffset; |
| } |
| |
| literalNextSyncInputOffsets[i] = thisMatchOffset; |
| } |
| else |
| { |
| literalNextSyncInputOffsets[i] = inputLength; |
| } |
| } |
| |
| if (besti < 0) |
| { |
| // No literals matched |
| return matcher.HardFail(HARDFAIL_PARAMETERS(HardFailMode::ImmediateFail)); |
| } |
| |
| nextSyncInputOffset = bestMatchOffset + 1; |
| |
| if (backup.upper != CharCountFlag) |
| { |
| // Set new start at most backup.upper from start of literal |
| CharCount maxBackup = bestMatchOffset - matchStart; |
| matchStart = bestMatchOffset - min(maxBackup, (CharCount)backup.upper); |
| } |
| // else: leave start where it is |
| |
| // Move input to new match start |
| inputOffset = matchStart; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int SyncToLiteralsAndBackupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("SyncToLiteralsAndBackup"); |
| PRINT_MIXIN_COMMA(ScannersMixin); |
| PRINT_MIXIN(BackupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(ScannersMixin); |
| PRINT_BYTES(BackupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // MatchGroupInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool MatchGroupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| GroupInfo* const info = matcher.GroupIdToGroupInfo(groupId); |
| if (!info->IsUndefined() && info->length > 0) |
| { |
| if (info->length > inputLength - inputOffset) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| CharCount groupOffset = info->offset; |
| const CharCount groupEndOffset = groupOffset + info->length; |
| |
| bool isCaseInsensitiveMatch = (matcher.program->flags & IgnoreCaseRegexFlag) != 0; |
| bool isCodePointList = (matcher.program->flags & UnicodeRegexFlag) != 0; |
| |
| // This is the only place in the runtime machinery we need to convert characters to their equivalence class |
| if (isCaseInsensitiveMatch && isCodePointList) |
| { |
| auto getNextCodePoint = [=](CharCount &offset, CharCount endOffset, codepoint_t &codePoint) { |
| if (endOffset <= offset) |
| { |
| return false; |
| } |
| |
| Char lowerPart = input[offset]; |
| if (!Js::NumberUtilities::IsSurrogateLowerPart(lowerPart) || offset + 1 == endOffset) |
| { |
| codePoint = lowerPart; |
| offset += 1; |
| return true; |
| } |
| |
| Char upperPart = input[offset + 1]; |
| if (!Js::NumberUtilities::IsSurrogateUpperPart(upperPart)) |
| { |
| codePoint = lowerPart; |
| offset += 1; |
| } |
| else |
| { |
| codePoint = Js::NumberUtilities::SurrogatePairAsCodePoint(lowerPart, upperPart); |
| offset += 2; |
| } |
| |
| return true; |
| }; |
| |
| codepoint_t equivs[CaseInsensitive::EquivClassSize]; |
| while (true) |
| { |
| codepoint_t groupCodePoint; |
| bool hasGroupCodePoint = getNextCodePoint(groupOffset, groupEndOffset, groupCodePoint); |
| if (!hasGroupCodePoint) |
| { |
| break; |
| } |
| |
| // We don't need to verify that there is a valid input code point since at the beginning |
| // of the function, we make sure that the length of the input is at least as long as the |
| // length of the group. |
| codepoint_t inputCodePoint; |
| getNextCodePoint(inputOffset, inputLength, inputCodePoint); |
| |
| bool doesMatch = false; |
| if (!Js::NumberUtilities::IsInSupplementaryPlane(groupCodePoint)) |
| { |
| auto toCanonical = [&](codepoint_t c) { |
| return matcher.standardChars->ToCanonical( |
| CaseInsensitive::MappingSource::CaseFolding, |
| static_cast<char16>(c)); |
| }; |
| doesMatch = (toCanonical(groupCodePoint) == toCanonical(inputCodePoint)); |
| } |
| else |
| { |
| uint tblidx = 0; |
| uint acth = 0; |
| CaseInsensitive::RangeToEquivClass(tblidx, groupCodePoint, groupCodePoint, acth, equivs); |
| CompileAssert(CaseInsensitive::EquivClassSize == 4); |
| doesMatch = |
| inputCodePoint == equivs[0] |
| || inputCodePoint == equivs[1] |
| || inputCodePoint == equivs[2] |
| || inputCodePoint == equivs[3]; |
| } |
| |
| if (!doesMatch) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| } |
| } |
| else if (isCaseInsensitiveMatch) |
| { |
| do |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| auto toCanonical = [&](CharCount &offset) { |
| return matcher.standardChars->ToCanonical(CaseInsensitive::MappingSource::UnicodeData, input[offset++]); |
| }; |
| |
| if (toCanonical(groupOffset) != toCanonical(inputOffset)) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| } |
| while (groupOffset < groupEndOffset); |
| } |
| else |
| { |
| do |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (input[groupOffset++] != input[inputOffset++]) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| } |
| while (groupOffset < groupEndOffset); |
| } |
| } |
| // else: trivially match empty string |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int MatchGroupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("MatchGroup"); |
| PRINT_MIXIN(GroupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(GroupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginDefineGroupInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginDefineGroupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId); |
| Assert(groupInfo->IsUndefined()); |
| groupInfo->offset = inputOffset; |
| Assert(groupInfo->IsUndefined()); |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginDefineGroupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginDefineGroup"); |
| PRINT_MIXIN(GroupMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(GroupMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // EndDefineGroupInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool EndDefineGroupInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (!noNeedToSave) |
| { |
| // UNDO ACTION: Restore group on backtrack |
| PUSH(contStack, ResetGroupCont, groupId); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId); |
| Assert(groupInfo->IsUndefined()); |
| Assert(inputOffset >= groupInfo->offset); |
| groupInfo->length = inputOffset - groupInfo->offset; |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int EndDefineGroupInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("EndDefineGroup"); |
| PRINT_MIXIN_COMMA(GroupMixin); |
| PRINT_MIXIN(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(GroupMixin); |
| PRINT_BYTES(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // DefineGroupFixedInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool DefineGroupFixedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (!noNeedToSave) |
| { |
| // UNDO ACTION: Restore group on backtrack |
| PUSH(contStack, ResetGroupCont, groupId); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId); |
| Assert(groupInfo->IsUndefined()); |
| groupInfo->offset = inputOffset - length; |
| groupInfo->length = length; |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int DefineGroupFixedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("DefineGroupFixed"); |
| PRINT_MIXIN_COMMA(GroupMixin); |
| PRINT_MIXIN_COMMA(FixedLengthMixin); |
| PRINT_MIXIN(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(GroupMixin); |
| PRINT_BYTES(FixedLengthMixin); |
| PRINT_BYTES(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginLoopInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginLoopInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // If loop has outer loops, the continuation stack may have choicepoints from an earlier "run" of this loop |
| // which, when backtracked to, may expect the loopInfo state to be as it was at the time the choicepoint was |
| // pushed. |
| // - If the loop is greedy with deterministic body, there may be Resumes into the follow of the loop, but |
| // they won't look at the loopInfo state so there's nothing to do. |
| // - If the loop is greedy, or if it is non-greedy with lower > 0, AND it has a non-deterministic body, |
| // we may have Resume entries which will resume inside the loop body, which may then run to a |
| // RepeatLoop, which will then look at the loopInfo state. However, each iteration is protected by |
| // a RestoreLoop by RepeatLoopInst below. (****) |
| // - If the loop is non-greedy there may be a RepeatLoop on the stack, so we must restore the loopInfo |
| // state before backtracking to it. |
| if (!isGreedy && hasOuterLoops) |
| { |
| PUSH(contStack, RestoreLoopCont, loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| // The loop body must always begin with empty inner groups |
| // - if the loop is not in an outer they will be empty due to the reset when the match began |
| // - if the loop is in an outer loop, they will have been reset by the outer loop's RepeatLoop instruction |
| #if DBG |
| for (int i = minBodyGroupId; i <= maxBodyGroupId; i++) |
| { |
| Assert(matcher.GroupIdToGroupInfo(i)->IsUndefined()); |
| } |
| #endif |
| |
| loopInfo->number = 0; |
| loopInfo->startInputOffset = inputOffset; |
| |
| if (repeats.lower == 0) |
| { |
| if (isGreedy) |
| { |
| // CHOICEPOINT: Try one iteration of body, if backtrack continue from here with no iterations |
| PUSH(contStack, ResumeCont, inputOffset, exitLabel); |
| instPointer += sizeof(*this); |
| } |
| else |
| { |
| // CHOICEPOINT: Try no iterations of body, if backtrack do one iteration of body from here |
| Assert(instPointer == (uint8*)this); |
| PUSH(contStack, RepeatLoopCont, matcher.InstPointerToLabel(instPointer), inputOffset); |
| instPointer = matcher.LabelToInstPointer(exitLabel); |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| else |
| { |
| // Must match minimum iterations, so continue to loop body |
| instPointer += sizeof(*this); |
| } |
| |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginLoopInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginLoop"); |
| PRINT_MIXIN_COMMA(BeginLoopMixin); |
| PRINT_MIXIN_COMMA(BodyGroupsMixin); |
| PRINT_MIXIN(GreedyMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(BeginLoopMixin); |
| PRINT_BYTES(BodyGroupsMixin); |
| PRINT_BYTES(GreedyMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatLoopInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatLoopInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| BeginLoopInst* begin = matcher.L2I(BeginLoop, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| // See comment (****) above. |
| if (begin->hasInnerNondet) |
| { |
| PUSH(contStack, RestoreLoopCont, begin->loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| loopInfo->number++; |
| |
| if (loopInfo->number < begin->repeats.lower) |
| { |
| // Must match another iteration of body. |
| loopInfo->startInputOffset = inputOffset; |
| if(begin->hasInnerNondet) |
| { |
| // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration. |
| // Save the inner groups and reset them for the next iteration. |
| matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack); |
| } |
| else |
| { |
| // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for |
| // the next iteration. |
| matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId); |
| } |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopInst)); |
| } |
| else if (inputOffset == loopInfo->startInputOffset && loopInfo->number > begin->repeats.lower) |
| { |
| // The minimum number of iterations has been satisfied but the last iteration made no progress. |
| // - With greedy & deterministic body, FAIL so as to undo that iteration and restore group bindings. |
| // - With greedy & non-deterministic body, FAIL so as to try another body alternative |
| // - With non-greedy, we're trying an additional iteration because the follow failed. But |
| // since we didn't consume anything the follow will fail again, so fail |
| // |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| else if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper) |
| { |
| // Success: proceed to remainder. |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| } |
| else if (begin->isGreedy) |
| { |
| // CHOICEPOINT: Try one more iteration of body, if backtrack continue from here with no more iterations |
| PUSH(contStack, ResumeCont, inputOffset, begin->exitLabel); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| loopInfo->startInputOffset = inputOffset; |
| |
| // If backtrack, we must continue with previous group bindings |
| matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack); |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopInst)); |
| } |
| else |
| { |
| // CHOICEPOINT: Try no more iterations of body, if backtrack do one more iteration of body from here |
| PUSH(contStack, RepeatLoopCont, beginLabel, inputOffset); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatLoopInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("RepeatLoop"); |
| PRINT_MIXIN(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginLoopIfCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginLoopIfCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == c) |
| { |
| // Commit to at least one iteration of loop |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // All inner groups must begin reset |
| #if DBG |
| for (int i = minBodyGroupId; i <= maxBodyGroupId; i++) |
| { |
| Assert(matcher.GroupIdToGroupInfo(i)->IsUndefined()); |
| } |
| #endif |
| loopInfo->number = 0; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| if (repeats.lower > 0) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer = matcher.LabelToInstPointer(exitLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginLoopIfCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginLoopIfChar"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN_COMMA(BeginLoopMixin); |
| PRINT_MIXIN(BodyGroupsMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(BeginLoopMixin); |
| PRINT_BYTES(BodyGroupsMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginLoopIfSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginLoopIfSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && set.Get(input[inputOffset])) |
| { |
| // Commit to at least one iteration of loop |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // All inner groups must be begin reset |
| #if DBG |
| for (int i = minBodyGroupId; i <= maxBodyGroupId; i++) |
| { |
| Assert(matcher.GroupIdToGroupInfo(i)->IsUndefined()); |
| } |
| #endif |
| |
| loopInfo->startInputOffset = inputOffset; |
| loopInfo->number = 0; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| if (repeats.lower > 0) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer = matcher.LabelToInstPointer(exitLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginLoopIfSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginLoopIfSet"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN_COMMA(BeginLoopMixin); |
| PRINT_MIXIN(BodyGroupsMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(BeginLoopMixin); |
| PRINT_BYTES(BodyGroupsMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatLoopIfCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatLoopIfCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| BeginLoopIfCharInst* begin = matcher.L2I(BeginLoopIfChar, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| if (begin->hasInnerNondet) |
| { |
| // May end up backtracking into loop body for iteration just completed: see above. |
| PUSH(contStack, RestoreLoopCont, begin->loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| loopInfo->number++; |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == begin->c) |
| { |
| if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper) |
| { |
| // If the loop body's first set and the loop's follow set are disjoint, we can just fail here since |
| // we know the next character in the input is in the loop body's first set. |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| // Commit to one more iteration |
| if(begin->hasInnerNondet) |
| { |
| // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration. |
| // Save the inner groups and reset them for the next iteration. |
| matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack); |
| } |
| else |
| { |
| // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for |
| // the next iteration. |
| matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId); |
| } |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopIfCharInst)); |
| return false; |
| } |
| |
| if (loopInfo->number < begin->repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| // Proceed to exit |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatLoopIfCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("RepeatLoopIfChar"); |
| PRINT_MIXIN(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatLoopIfSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatLoopIfSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| BeginLoopIfSetInst* begin = matcher.L2I(BeginLoopIfSet, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| if (begin->hasInnerNondet) |
| { |
| // May end up backtracking into loop body for iteration just completed: see above. |
| PUSH(contStack, RestoreLoopCont, begin->loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| loopInfo->number++; |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && begin->set.Get(input[inputOffset])) |
| { |
| if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper) |
| { |
| // If the loop body's first set and the loop's follow set are disjoint, we can just fail here since |
| // we know the next character in the input is in the loop body's first set. |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| // Commit to one more iteration |
| if (begin->hasInnerNondet) |
| { |
| // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration. |
| // Save the inner groups and reset them for the next iteration. |
| matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack); |
| } |
| else |
| { |
| // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for |
| // the next iteration. |
| matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId); |
| } |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopIfSetInst)); |
| return false; |
| } |
| |
| if (loopInfo->number < begin->repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| // Proceed to exit |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatLoopIfSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("RepeatLoopIfSet"); |
| PRINT_MIXIN(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginLoopFixedInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginLoopFixedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixed entry for |
| // this loop. We must make sure it's state is preserved on backtrack. |
| if (hasOuterLoops) |
| { |
| PUSH(contStack, RestoreLoopCont, loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| // startInputOffset will stay here for all iterations, and we'll use number of length to figure out |
| // where in the input to rewind to |
| loopInfo->number = 0; |
| loopInfo->startInputOffset = inputOffset; |
| |
| if (repeats.lower == 0) |
| { |
| // CHOICEPOINT: Try one iteration of body. Failure of body will rewind input to here and resume with follow. |
| Assert(instPointer == (uint8*)this); |
| PUSH(contStack, RewindLoopFixedCont, matcher.InstPointerToLabel(instPointer), true); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| // else: Must match minimum iterations, so continue to loop body. Failure of body signals failure of entire loop. |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginLoopFixedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginLoopFixed"); |
| PRINT_MIXIN_COMMA(BeginLoopMixin); |
| PRINT_MIXIN(FixedLengthMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(BeginLoopMixin); |
| PRINT_BYTES(FixedLengthMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatLoopFixedInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatLoopFixedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| BeginLoopFixedInst* begin = matcher.L2I(BeginLoopFixed, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| loopInfo->number++; |
| |
| if (loopInfo->number < begin->repeats.lower) |
| { |
| // Must match another iteration of body. Failure of body signals failure of the entire loop. |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedInst)); |
| } |
| else if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper) |
| { |
| // Matched maximum number of iterations. Continue with follow. |
| if (begin->repeats.lower < begin->repeats.upper) |
| { |
| // Failure of follow will try one fewer iterations (subject to repeats.lower). |
| // Since loop body is non-deterministic and does not define groups the rewind continuation must be on top of the stack. |
| Cont *top = contStack.Top(); |
| Assert(top != 0); |
| Assert(top->tag == Cont::ContTag::RewindLoopFixed); |
| RewindLoopFixedCont* rewind = (RewindLoopFixedCont*)top; |
| rewind->tryingBody = false; |
| } |
| // else: we never pushed a rewind continuation |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| } |
| else |
| { |
| // CHOICEPOINT: Try one more iteration of body. Failure of body will rewind input to here and |
| // try follow. |
| if (loopInfo->number == begin->repeats.lower) |
| { |
| // i.e. begin->repeats.lower > 0, so continuation won't have been pushed in BeginLoopFixed |
| PUSH(contStack, RewindLoopFixedCont, beginLabel, true); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedInst)); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatLoopFixedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("RepeatLoopFixed"); |
| PRINT_MIXIN(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // LoopSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool LoopSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixed entry for |
| // this loop. We must make sure it's state is preserved on backtrack. |
| if (hasOuterLoops) |
| { |
| PUSH(contStack, RestoreLoopCont, loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| // startInputOffset will stay here for all iterations, and we'll use number of length to figure out |
| // where in the input to rewind to |
| loopInfo->startInputOffset = inputOffset; |
| |
| // Consume as many elements of set as possible |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| const CharCount loopMatchStart = inputOffset; |
| const CharCountOrFlag repeatsUpper = repeats.upper; |
| const CharCount inputEndOffset = |
| static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset |
| ? inputLength |
| : inputOffset + static_cast<CharCount>(repeatsUpper); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset])) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| loopInfo->number = inputOffset - loopMatchStart; |
| if (loopInfo->number < repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| else if (loopInfo->number > repeats.lower) |
| { |
| // CHOICEPOINT: If follow fails, try consuming one fewer characters |
| Assert(instPointer == (uint8*)this); |
| PUSH(contStack, RewindLoopSetCont, matcher.InstPointerToLabel(instPointer)); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| // else: failure of follow signals failure of entire loop |
| |
| // Continue with follow |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int LoopSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("LoopSetInst"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN(BeginLoopBasicsMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(BeginLoopBasicsMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| inline bool LoopSetWithFollowFirstInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixed entry for |
| // this loop. We must make sure it's state is preserved on backtrack. |
| if (hasOuterLoops) |
| { |
| PUSH(contStack, RestoreLoopCont, loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| if (loopInfo->offsetsOfFollowFirst) |
| { |
| loopInfo->offsetsOfFollowFirst->Clear(); |
| } |
| // startInputOffset will stay here for all iterations, and we'll use number of length to figure out |
| // where in the input to rewind to |
| loopInfo->startInputOffset = inputOffset; |
| |
| // Consume as many elements of set as possible |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| const CharCount loopMatchStart = inputOffset; |
| const CharCountOrFlag repeatsUpper = repeats.upper; |
| const CharCount inputEndOffset = |
| static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset |
| ? inputLength |
| : inputOffset + static_cast<CharCount>(repeatsUpper); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset])) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (input[inputOffset] == this->followFirst) |
| { |
| loopInfo->EnsureOffsetsOfFollowFirst(matcher); |
| loopInfo->offsetsOfFollowFirst->Add(inputOffset - loopInfo->startInputOffset); |
| } |
| inputOffset++; |
| } |
| |
| loopInfo->number = inputOffset - loopMatchStart; |
| if (loopInfo->number < repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| else if (loopInfo->number > repeats.lower) |
| { |
| // CHOICEPOINT: If follow fails, try consuming one fewer characters |
| Assert(instPointer == (uint8*)this); |
| PUSH(contStack, RewindLoopSetWithFollowFirstCont, matcher.InstPointerToLabel(instPointer)); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| // else: failure of follow signals failure of entire loop |
| |
| // Continue with follow |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int LoopSetWithFollowFirstInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("LoopSetWithFollowFirstInst"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN_COMMA(BeginLoopBasicsMixin); |
| PRINT_MIXIN(FollowFirstMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(BeginLoopBasicsMixin); |
| PRINT_MIXIN(FollowFirstMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginLoopFixedGroupLastIterationInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginLoopFixedGroupLastIterationInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined()); |
| |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| // If loop is contained in an outer loop, continuation stack may already have a RewindLoopFixedGroupLastIteration entry |
| // for this loop. We must make sure it's state is preserved on backtrack. |
| if (hasOuterLoops) |
| { |
| PUSH(contStack, RestoreLoopCont, loopId, *loopInfo, matcher); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| // If loop is contained in an outer loop or assertion, we must reset the group binding if we backtrack all the way out |
| if (!noNeedToSave) |
| { |
| PUSH(contStack, ResetGroupCont, groupId); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| // startInputOffset will stay here for all iterations, and we'll use number of length to figure out |
| // where in the input to rewind to |
| loopInfo->number = 0; |
| loopInfo->startInputOffset = inputOffset; |
| |
| if (repeats.lower == 0) |
| { |
| // CHOICEPOINT: Try one iteration of body. Failure of body will rewind input to here and resume with follow. |
| Assert(instPointer == (uint8*)this); |
| PUSH(contStack, RewindLoopFixedGroupLastIterationCont, matcher.InstPointerToLabel(instPointer), true); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| // else: Must match minimum iterations, so continue to loop body. Failure of body signals failure of entire loop. |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginLoopFixedGroupLastIterationInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginLoopFixedGroupLastIteration"); |
| PRINT_MIXIN_COMMA(BeginLoopMixin); |
| PRINT_MIXIN_COMMA(FixedLengthMixin); |
| PRINT_MIXIN_COMMA(GroupMixin); |
| PRINT_MIXIN(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(BeginLoopMixin); |
| PRINT_BYTES(FixedLengthMixin); |
| PRINT_BYTES(GroupMixin); |
| PRINT_BYTES(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatLoopFixedGroupLastIterationInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatLoopFixedGroupLastIterationInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| BeginLoopFixedGroupLastIterationInst* begin = matcher.L2I(BeginLoopFixedGroupLastIteration, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| loopInfo->number++; |
| |
| if (loopInfo->number < begin->repeats.lower) |
| { |
| // Must match another iteration of body. Failure of body signals failure of the entire loop. |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedGroupLastIterationInst)); |
| } |
| else if (begin->repeats.upper != CharCountFlag && loopInfo->number >= (CharCount)begin->repeats.upper) |
| { |
| // Matched maximum number of iterations. Continue with follow. |
| if (begin->repeats.lower < begin->repeats.upper) |
| { |
| // Failure of follow will try one fewer iterations (subject to repeats.lower). |
| // Since loop body is non-deterministic and does not define groups the rewind continuation must be on top of the stack. |
| Cont *top = contStack.Top(); |
| Assert(top != 0); |
| Assert(top->tag == Cont::ContTag::RewindLoopFixedGroupLastIteration); |
| RewindLoopFixedGroupLastIterationCont* rewind = (RewindLoopFixedGroupLastIterationCont*)top; |
| rewind->tryingBody = false; |
| } |
| // else: we never pushed a rewind continuation |
| |
| // Bind group |
| GroupInfo* groupInfo = matcher.GroupIdToGroupInfo(begin->groupId); |
| groupInfo->offset = inputOffset - begin->length; |
| groupInfo->length = begin->length; |
| |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| } |
| else |
| { |
| // CHOICEPOINT: Try one more iteration of body. Failure of body will rewind input to here and |
| // try follow. |
| if (loopInfo->number == begin->repeats.lower) |
| { |
| // i.e. begin->repeats.lower > 0, so continuation won't have been pushed in BeginLoopFixed |
| PUSH(contStack, RewindLoopFixedGroupLastIterationCont, beginLabel, true); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopFixedGroupLastIterationInst)); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatLoopFixedGroupLastIterationInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("RepeatLoopFixedGroupLastIteration"); |
| PRINT_MIXIN(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginGreedyLoopNoBacktrackInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginGreedyLoopNoBacktrackInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(loopId); |
| |
| loopInfo->number = 0; |
| loopInfo->startInputOffset = inputOffset; |
| |
| // CHOICEPOINT: Try one iteration of body, if backtrack continue from here with no iterations |
| PUSH(contStack, ResumeCont, inputOffset, exitLabel); |
| instPointer += sizeof(*this); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginGreedyLoopNoBacktrackInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginGreedyLoopNoBacktrack"); |
| PRINT_MIXIN(GreedyLoopNoBacktrackMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(GreedyLoopNoBacktrackMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatGreedyLoopNoBacktrackInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatGreedyLoopNoBacktrackInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| BeginGreedyLoopNoBacktrackInst* begin = matcher.L2I(BeginGreedyLoopNoBacktrack, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| loopInfo->number++; |
| |
| if (inputOffset == loopInfo->startInputOffset) |
| { |
| // No progress |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| else |
| { |
| // CHOICEPOINT: Try one more iteration of body, if backtrack, continue from here with no more iterations. |
| // Since the loop body is deterministic and group free, it wouldn't have left any continuation records. |
| // Therefore we can simply update the Resume continuation still on the top of the stack with the current |
| // input pointer. |
| Cont* top = contStack.Top(); |
| Assert(top != 0 && top->tag == Cont::ContTag::Resume); |
| ResumeCont* resume = (ResumeCont*)top; |
| resume->origInputOffset = inputOffset; |
| |
| loopInfo->startInputOffset = inputOffset; |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginGreedyLoopNoBacktrackInst)); |
| } |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatGreedyLoopNoBacktrackInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("RepeatGreedyLoopNoBacktrack"); |
| PRINT_MIXIN(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(RepeatLoopMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<ChompMode Mode> |
| inline bool ChompCharInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const Char matchC = c; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (Mode == ChompMode::Star || (inputOffset < inputLength && input[inputOffset] == matchC)) |
| { |
| while (true) |
| { |
| if (Mode != ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == matchC) |
| { |
| if (Mode == ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| continue; |
| } |
| break; |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<ChompMode Mode> |
| int ChompCharInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (Mode == ChompMode::Star) |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompChar<Star>"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompChar<Plus>"); |
| } |
| |
| PRINT_MIXIN(CharMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<ChompMode Mode> |
| inline bool ChompSetInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if(Mode == ChompMode::Star || (inputOffset < inputLength && matchSet.Get(input[inputOffset]))) |
| { |
| while(true) |
| { |
| if (Mode != ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && matchSet.Get(input[inputOffset])) |
| { |
| if (Mode == ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| continue; |
| } |
| break; |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<ChompMode Mode> |
| int ChompSetInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (Mode == ChompMode::Star) |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompSet<Star>"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompSet<Plus>"); |
| } |
| |
| PRINT_MIXIN(SetMixin<false>); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompCharGroupInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<ChompMode Mode> |
| inline bool ChompCharGroupInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined()); |
| |
| const CharCount inputStartOffset = inputOffset; |
| const Char matchC = c; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if(Mode == ChompMode::Star || (inputOffset < inputLength && input[inputOffset] == matchC)) |
| { |
| while (true) |
| { |
| if (Mode != ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == matchC) |
| { |
| if (Mode == ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| continue; |
| } |
| break; |
| } |
| |
| if (!noNeedToSave) |
| { |
| // UNDO ACTION: Restore group on backtrack |
| PUSH(contStack, ResetGroupCont, groupId); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId); |
| groupInfo->offset = inputStartOffset; |
| groupInfo->length = inputOffset - inputStartOffset; |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<ChompMode Mode> |
| int ChompCharGroupInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (Mode == ChompMode::Star) |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompCharGroup<Star>"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompCharGroup<Plus>"); |
| } |
| |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN_COMMA(GroupMixin); |
| PRINT_MIXIN(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(GroupMixin); |
| PRINT_BYTES(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompSetGroupInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| template<ChompMode Mode> |
| inline bool ChompSetGroupInst<Mode>::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined()); |
| |
| const CharCount inputStartOffset = inputOffset; |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (Mode == ChompMode::Star || (inputOffset < inputLength && matchSet.Get(input[inputOffset]))) |
| { |
| while (true) |
| { |
| if (Mode != ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && matchSet.Get(input[inputOffset])) |
| { |
| if (Mode == ChompMode::Star) |
| { |
| ++inputOffset; |
| } |
| continue; |
| } |
| break; |
| } |
| |
| if (!noNeedToSave) |
| { |
| // UNDO ACTION: Restore group on backtrack |
| PUSH(contStack, ResetGroupCont, groupId); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId); |
| groupInfo->offset = inputStartOffset; |
| groupInfo->length = inputOffset - inputStartOffset; |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| template<ChompMode Mode> |
| int ChompSetGroupInst<Mode>::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| if (Mode == ChompMode::Star) |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompSetGroup<Star>"); |
| } |
| else |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompSetGroup<Plus>"); |
| } |
| |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN_COMMA(GroupMixin); |
| PRINT_MIXIN(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(GroupMixin); |
| PRINT_BYTES(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompCharBoundedInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool ChompCharBoundedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const Char matchC = c; |
| const CharCount loopMatchStart = inputOffset; |
| const CharCountOrFlag repeatsUpper = repeats.upper; |
| const CharCount inputEndOffset = |
| static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset |
| ? inputLength |
| : inputOffset + static_cast<CharCount>(repeatsUpper); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputEndOffset && input[inputOffset] == matchC) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset - loopMatchStart < repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int ChompCharBoundedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompCharBounded"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN(ChompBoundedMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(ChompBoundedMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompSetBoundedInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool ChompSetBoundedInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| const CharCount loopMatchStart = inputOffset; |
| const CharCountOrFlag repeatsUpper = repeats.upper; |
| const CharCount inputEndOffset = |
| static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset |
| ? inputLength |
| : inputOffset + static_cast<CharCount>(repeatsUpper); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset])) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset - loopMatchStart < repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int ChompSetBoundedInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompSetBounded"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN(ChompBoundedMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(ChompBoundedMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ChompSetBoundedGroupLastCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool ChompSetBoundedGroupLastCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| Assert(matcher.GroupIdToGroupInfo(groupId)->IsUndefined()); |
| |
| const RuntimeCharSet<Char>& matchSet = this->set; |
| const CharCount loopMatchStart = inputOffset; |
| const CharCountOrFlag repeatsUpper = repeats.upper; |
| const CharCount inputEndOffset = |
| static_cast<CharCount>(repeatsUpper) >= inputLength - inputOffset |
| ? inputLength |
| : inputOffset + static_cast<CharCount>(repeatsUpper); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| while (inputOffset < inputEndOffset && matchSet.Get(input[inputOffset])) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| inputOffset++; |
| } |
| |
| if (inputOffset - loopMatchStart < repeats.lower) |
| { |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| if (inputOffset > loopMatchStart) |
| { |
| if (!noNeedToSave) |
| { |
| PUSH(contStack, ResetGroupCont, groupId); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| } |
| |
| GroupInfo *const groupInfo = matcher.GroupIdToGroupInfo(groupId); |
| groupInfo->offset = inputOffset - 1; |
| groupInfo->length = 1; |
| } |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int ChompSetBoundedGroupLastCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("ChompSetBoundedGroupLastChar"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN_COMMA(ChompBoundedMixin); |
| PRINT_MIXIN_COMMA(GroupMixin); |
| PRINT_MIXIN(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(ChompBoundedMixin); |
| PRINT_BYTES(GroupMixin); |
| PRINT_BYTES(NoNeedToSaveMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // TryInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool TryInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| // CHOICEPOINT: Resume at fail label on backtrack |
| PUSH(contStack, ResumeCont, inputOffset, failLabel); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int TryInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("Try"); |
| PRINT_MIXIN(TryMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(TryMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // TryIfCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool TryIfCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == c) |
| { |
| // CHOICEPOINT: Resume at fail label on backtrack |
| PUSH(contStack, ResumeCont, inputOffset, failLabel); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| // Proceed directly to exit |
| instPointer = matcher.LabelToInstPointer(failLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int TryIfCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("TryIfChar"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN(TryMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(TryMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // TryMatchCharInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool TryMatchCharInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && input[inputOffset] == c) |
| { |
| // CHOICEPOINT: Resume at fail label on backtrack |
| PUSH(contStack, ResumeCont, inputOffset, failLabel); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| // Proceed directly to exit |
| instPointer = matcher.LabelToInstPointer(failLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int TryMatchCharInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("TryMatchChar"); |
| PRINT_MIXIN_COMMA(CharMixin); |
| PRINT_MIXIN(TryMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(CharMixin); |
| PRINT_BYTES(TryMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // TryIfSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool TryIfSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && set.Get(input[inputOffset])) |
| { |
| // CHOICEPOINT: Resume at fail label on backtrack |
| PUSH(contStack, ResumeCont, inputOffset, failLabel); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| // Proceed directly to exit |
| instPointer = matcher.LabelToInstPointer(failLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int TryIfSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("TryIfSet"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN(TryMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(TryMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // TryMatchSetInst (optimized instruction) |
| // ---------------------------------------------------------------------- |
| |
| inline bool TryMatchSetInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.CompStats(); |
| #endif |
| if (inputOffset < inputLength && set.Get(input[inputOffset])) |
| { |
| // CHOICEPOINT: Resume at fail label on backtrack |
| PUSH(contStack, ResumeCont, inputOffset, failLabel); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| inputOffset++; |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| // Proceed directly to exit |
| instPointer = matcher.LabelToInstPointer(failLabel); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int TryMatchSetInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("TryMatchSet"); |
| PRINT_MIXIN_COMMA(SetMixin<false>); |
| PRINT_MIXIN(TryMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(SetMixin<false>); |
| PRINT_BYTES(TryMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // BeginAssertionInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool BeginAssertionInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| Assert(instPointer == (uint8*)this); |
| |
| if (!isNegation) |
| { |
| // If the positive assertion binds some groups then on success any RestoreGroup continuations pushed |
| // in the assertion body will be cut. Hence if the entire assertion is backtracked over we must restore |
| // the current inner group bindings. |
| matcher.SaveInnerGroups(minBodyGroupId, maxBodyGroupId, false, input, contStack); |
| } |
| |
| PUSHA(assertionStack, AssertionInfo, matcher.InstPointerToLabel(instPointer), inputOffset, contStack.Position()); |
| PUSH(contStack, PopAssertionCont); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.PushStats(contStack, input); |
| #endif |
| |
| instPointer += sizeof(*this); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int BeginAssertionInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("BeginAssertion"); |
| PRINT_MIXIN_COMMA(BodyGroupsMixin); |
| PRINT_MIXIN_COMMA(NegationMixin); |
| PRINT_MIXIN(NextLabelMixin); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(BodyGroupsMixin); |
| PRINT_BYTES(NegationMixin); |
| PRINT_BYTES(NextLabelMixin); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // EndAssertionInst |
| // ---------------------------------------------------------------------- |
| |
| inline bool EndAssertionInst::Exec(REGEX_INST_EXEC_PARAMETERS) const |
| { |
| if (!matcher.PopAssertion(inputOffset, instPointer, contStack, assertionStack, true)) |
| { |
| // Body of negative assertion succeeded, so backtrack |
| return matcher.Fail(FAIL_PARAMETERS); |
| } |
| |
| // else: body of positive assertion succeeded, instruction pointer already at next instruction |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int EndAssertionInst::Print(DebugWriter* w, Label label, const Char* litbuf) const |
| { |
| PRINT_RE_BYTECODE_BEGIN("EndAssertion"); |
| PRINT_RE_BYTECODE_MID(); |
| PRINT_BYTES(EndAssertionInst); |
| PRINT_RE_BYTECODE_END(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Matcher state |
| // ---------------------------------------------------------------------- |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void LoopInfo::Print(DebugWriter* w) const |
| { |
| w->Print(_u("number: %u, startInputOffset: %u"), number, startInputOffset); |
| } |
| #endif |
| |
| void LoopInfo::EnsureOffsetsOfFollowFirst(Matcher& matcher) |
| { |
| if (this->offsetsOfFollowFirst == nullptr) |
| { |
| this->offsetsOfFollowFirst = JsUtil::List<CharCount, ArenaAllocator>::New(matcher.pattern->library->GetScriptContext()->RegexAllocator()); |
| } |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void GroupInfo::Print(DebugWriter* w, const Char* const input) const |
| { |
| if (IsUndefined()) |
| { |
| w->Print(_u("<undefined> (%u)"), offset); |
| } |
| else |
| { |
| w->PrintQuotedString(input + offset, (CharCount)length); |
| w->Print(_u(" (%u+%u)"), offset, (CharCount)length); |
| } |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void AssertionInfo::Print(DebugWriter* w) const |
| { |
| w->PrintEOL(_u("beginLabel: L%04x, startInputOffset: %u, contStackPosition: $llu"), beginLabel, startInputOffset, static_cast<unsigned long long>(contStackPosition)); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ResumeCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool ResumeCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| inputOffset = origInputOffset; |
| instPointer = matcher.LabelToInstPointer(origInstLabel); |
| return true; // STOP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int ResumeCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("Resume(origInputOffset: %u, origInstLabel: L%04x)"), origInputOffset, origInstLabel); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RestoreLoopCont |
| // ---------------------------------------------------------------------- |
| |
| inline RestoreLoopCont::RestoreLoopCont(int loopId, LoopInfo& origLoopInfo, Matcher& matcher) : Cont(ContTag::RestoreLoop), loopId(loopId) |
| { |
| this->origLoopInfo.number = origLoopInfo.number; |
| this->origLoopInfo.startInputOffset = origLoopInfo.startInputOffset; |
| this->origLoopInfo.offsetsOfFollowFirst = nullptr; |
| if (origLoopInfo.offsetsOfFollowFirst != nullptr) |
| { |
| this->origLoopInfo.offsetsOfFollowFirst = JsUtil::List<CharCount, ArenaAllocator>::New(matcher.pattern->library->GetScriptContext()->RegexAllocator()); |
| this->origLoopInfo.offsetsOfFollowFirst->Copy(origLoopInfo.offsetsOfFollowFirst); |
| } |
| } |
| |
| inline bool RestoreLoopCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.QueryContinue(qcTicks); |
| |
| *matcher.LoopIdToLoopInfo(loopId) = origLoopInfo; |
| return false; // KEEP BACKTRACKING |
| } |
| |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RestoreLoopCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->Print(_u("RestoreLoop(loopId: %d, "), loopId); |
| origLoopInfo.Print(w); |
| w->PrintEOL(_u(")")); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RestoreGroupCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool RestoreGroupCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| *matcher.GroupIdToGroupInfo(groupId) = origGroupInfo; |
| return false; // KEEP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RestoreGroupCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->Print(_u("RestoreGroup(groupId: %d, "), groupId); |
| origGroupInfo.Print(w, input); |
| w->PrintEOL(_u(")")); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ResetGroupCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool ResetGroupCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.ResetGroup(groupId); |
| return false; // KEEP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int ResetGroupCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("ResetGroup(groupId: %d)"), groupId); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // ResetGroupRangeCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool ResetGroupRangeCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.ResetInnerGroups(fromGroupId, toGroupId); |
| return false; // KEEP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int ResetGroupRangeCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("ResetGroupRange(fromGroupId: %d, toGroupId: %d)"), fromGroupId, toGroupId); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RepeatLoopCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool RepeatLoopCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.QueryContinue(qcTicks); |
| |
| // Try one more iteration of a non-greedy loop |
| BeginLoopInst* begin = matcher.L2I(BeginLoop, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| loopInfo->startInputOffset = inputOffset = origInputOffset; |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(BeginLoopInst)); |
| if(begin->hasInnerNondet) |
| { |
| // If it backtracks into the loop body of an earlier iteration, it must restore inner groups for that iteration. |
| // Save the inner groups and reset them for the next iteration. |
| matcher.SaveInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId, true, input, contStack); |
| } |
| else |
| { |
| // If it backtracks, the entire loop will fail, so no need to restore groups. Just reset the inner groups for |
| // the next iteration. |
| matcher.ResetInnerGroups(begin->minBodyGroupId, begin->maxBodyGroupId); |
| } |
| return true; // STOP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RepeatLoopCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("RepeatLoop(beginLabel: L%04x, origInputOffset: %u)"), beginLabel, origInputOffset); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // PopAssertionCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool PopAssertionCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| Assert(!assertionStack.IsEmpty()); |
| if (matcher.PopAssertion(inputOffset, instPointer, contStack, assertionStack, false)) |
| { |
| // Body of negative assertion failed |
| return true; // STOP BACKTRACKING |
| } |
| else |
| { |
| // Body of positive assertion failed |
| return false; // CONTINUE BACKTRACKING |
| } |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int PopAssertionCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("PopAssertion()")); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RewindLoopFixedCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool RewindLoopFixedCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.QueryContinue(qcTicks); |
| |
| BeginLoopFixedInst* begin = matcher.L2I(BeginLoopFixed, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| if (tryingBody) |
| { |
| tryingBody = false; |
| // loopInfo->number is the number of iterations completed before trying body |
| Assert(loopInfo->number >= begin->repeats.lower); |
| } |
| else |
| { |
| // loopInfo->number is the number of iterations completed before trying follow |
| Assert(loopInfo->number > begin->repeats.lower); |
| // Try follow with one fewer iteration |
| loopInfo->number--; |
| } |
| |
| // Rewind input |
| inputOffset = loopInfo->startInputOffset + loopInfo->number * begin->length; |
| |
| if (loopInfo->number > begin->repeats.lower) |
| { |
| // Un-pop the continuation ready for next time |
| contStack.UnPop<RewindLoopFixedCont>(); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.UnPopStats(contStack, input); |
| #endif |
| } |
| // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate |
| |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| return true; // STOP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RewindLoopFixedCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("RewindLoopFixed(beginLabel: L%04x, tryingBody: %s)"), beginLabel, tryingBody ? _u("true") : _u("false")); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RewindLoopSetCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool RewindLoopSetCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.QueryContinue(qcTicks); |
| |
| LoopSetInst* begin = matcher.L2I(LoopSet, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| // loopInfo->number is the number of iterations completed before trying follow |
| Assert(loopInfo->number > begin->repeats.lower); |
| // Try follow with fewer iterations |
| loopInfo->number--; |
| |
| // Rewind input |
| inputOffset = loopInfo->startInputOffset + loopInfo->number; |
| |
| if (loopInfo->number > begin->repeats.lower) |
| { |
| // Un-pop the continuation ready for next time |
| contStack.UnPop<RewindLoopSetCont>(); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.UnPopStats(contStack, input); |
| #endif |
| } |
| // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate |
| |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(LoopSetInst)); |
| return true; // STOP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RewindLoopSetCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("RewindLoopSet(beginLabel: L%04x)"), beginLabel); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RewindLoopSetWithFollowFirstCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool RewindLoopSetWithFollowFirstCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.QueryContinue(qcTicks); |
| |
| LoopSetWithFollowFirstInst* begin = matcher.L2I(LoopSetWithFollowFirst, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| |
| // loopInfo->number is the number of iterations completed before trying follow |
| Assert(loopInfo->number > begin->repeats.lower); |
| // Try follow with fewer iterations |
| |
| if (loopInfo->offsetsOfFollowFirst == nullptr) |
| { |
| if (begin->followFirst != MaxUChar) |
| { |
| // We determined the first character in the follow set at compile time, |
| // but didn't find a single match for it in the last iteration of the loop. |
| // So, there is no benefit in backtracking. |
| loopInfo->number = begin->repeats.lower; // stop backtracking |
| } |
| else |
| { |
| // We couldn't determine the first character in the follow set at compile time; |
| // fall back to backtracking by one character at a time. |
| loopInfo->number--; |
| } |
| } |
| else |
| { |
| if (loopInfo->offsetsOfFollowFirst->Empty()) |
| { |
| // We have already backtracked to the first offset where we matched the LoopSet's followFirst; |
| // no point in backtracking more. |
| loopInfo->number = begin->repeats.lower; // stop backtracking |
| } |
| else |
| { |
| // Backtrack to the previous offset where we matched the LoopSet's followFirst |
| // We will be doing one unnecessary match. But, if we wanted to avoid it, we'd have |
| // to propagate to the next Inst, that the first character is already matched. |
| // Seems like an overkill to avoid one match. |
| loopInfo->number = loopInfo->offsetsOfFollowFirst->RemoveAtEnd(); |
| } |
| } |
| |
| // If loopInfo->number now is less than begins->repeats.lower, the loop |
| // shouldn't match anything. In that case, stop backtracking. |
| loopInfo->number = max(loopInfo->number, begin->repeats.lower); |
| // Rewind input |
| inputOffset = loopInfo->startInputOffset + loopInfo->number; |
| |
| if (loopInfo->number > begin->repeats.lower) |
| { |
| // Un-pop the continuation ready for next time |
| contStack.UnPop<RewindLoopSetWithFollowFirstCont>(); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.UnPopStats(contStack, input); |
| #endif |
| } |
| // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate |
| |
| instPointer = matcher.LabelToInstPointer(beginLabel + sizeof(LoopSetWithFollowFirstInst)); |
| return true; // STOP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RewindLoopSetWithFollowFirstCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("RewindLoopSetWithFollowFirst(beginLabel: L%04x)"), beginLabel); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RewindLoopFixedGroupLastIterationCont |
| // ---------------------------------------------------------------------- |
| |
| inline bool RewindLoopFixedGroupLastIterationCont::Exec(REGEX_CONT_EXEC_PARAMETERS) |
| { |
| matcher.QueryContinue(qcTicks); |
| |
| BeginLoopFixedGroupLastIterationInst* begin = matcher.L2I(BeginLoopFixedGroupLastIteration, beginLabel); |
| LoopInfo* loopInfo = matcher.LoopIdToLoopInfo(begin->loopId); |
| GroupInfo* groupInfo = matcher.GroupIdToGroupInfo(begin->groupId); |
| |
| if (tryingBody) |
| { |
| tryingBody = false; |
| // loopInfo->number is the number of iterations completed before current attempt of body |
| Assert(loopInfo->number >= begin->repeats.lower); |
| } |
| else |
| { |
| // loopInfo->number is the number of iterations completed before trying follow |
| Assert(loopInfo->number > begin->repeats.lower); |
| // Try follow with one fewer iteration |
| loopInfo->number--; |
| } |
| |
| // Rewind input |
| inputOffset = loopInfo->startInputOffset + loopInfo->number * begin->length; |
| |
| if (loopInfo->number > 0) |
| { |
| // Bind previous iteration's body |
| groupInfo->offset = inputOffset - begin->length; |
| groupInfo->length = begin->length; |
| } |
| else |
| { |
| groupInfo->Reset(); |
| } |
| |
| if (loopInfo->number > begin->repeats.lower) |
| { |
| // Un-pop the continuation ready for next time |
| contStack.UnPop<RewindLoopFixedGroupLastIterationCont>(); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| matcher.UnPopStats(contStack, input); |
| #endif |
| } |
| // else: Can't try any fewer iterations if follow fails, so leave continuation as popped and let failure propagate |
| |
| instPointer = matcher.LabelToInstPointer(begin->exitLabel); |
| return true; // STOP BACKTRACKING |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| int RewindLoopFixedGroupLastIterationCont::Print(DebugWriter* w, const Char* const input) const |
| { |
| w->PrintEOL(_u("RewindLoopFixedGroupLastIteration(beginLabel: L%04x, tryingBody: %s)"), beginLabel, tryingBody ? _u("true") : _u("false")); |
| return sizeof(*this); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Matcher |
| // ---------------------------------------------------------------------- |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void ContStack::Print(DebugWriter* w, const Char* const input) const |
| { |
| for (Iterator it(*this); it; ++it) |
| { |
| w->Print(_u("%4llu: "), static_cast<unsigned long long>(it.Position())); |
| it->Print(w, input); |
| } |
| } |
| #endif |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void AssertionStack::Print(DebugWriter* w, const Matcher* matcher) const |
| { |
| for (Iterator it(*this); it; ++it) |
| { |
| it->Print(w); |
| } |
| } |
| #endif |
| |
| Matcher::Matcher(Js::ScriptContext* scriptContext, RegexPattern* pattern) |
| : pattern(pattern) |
| , standardChars(scriptContext->GetThreadContext()->GetStandardChars((char16*)0)) |
| , program(pattern->rep.unified.program) |
| , groupInfos(nullptr) |
| , loopInfos(nullptr) |
| , literalNextSyncInputOffsets(nullptr) |
| , recycler(scriptContext->GetRecycler()) |
| , previousQcTime(0) |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , stats(0) |
| , w(0) |
| #endif |
| { |
| // Don't need to zero out - the constructor for GroupInfo should take care of it |
| groupInfos = RecyclerNewArrayLeaf(recycler, GroupInfo, program->numGroups); |
| |
| if (program->numLoops > 0) |
| { |
| loopInfos = RecyclerNewArrayLeafZ(recycler, LoopInfo, program->numLoops); |
| } |
| } |
| |
| Matcher *Matcher::New(Js::ScriptContext* scriptContext, RegexPattern* pattern) |
| { |
| return RecyclerNew(scriptContext->GetRecycler(), Matcher, scriptContext, pattern); |
| } |
| |
| Matcher *Matcher::CloneToScriptContext(Js::ScriptContext *scriptContext, RegexPattern *pattern) |
| { |
| Matcher *result = New(scriptContext, pattern); |
| if (groupInfos) |
| { |
| size_t size = program->numGroups * sizeof(GroupInfo); |
| js_memcpy_s(result->groupInfos, size, groupInfos, size); |
| } |
| if (loopInfos) |
| { |
| size_t size = program->numLoops * sizeof(LoopInfo); |
| js_memcpy_s(result->loopInfos, size, loopInfos, size); |
| } |
| |
| return result; |
| } |
| |
| #if DBG |
| const Cont::ContTag contTags[] = { |
| #define M(O) Cont::ContTag::O, |
| #include "RegexContcodes.h" |
| #undef M |
| }; |
| |
| const Cont::ContTag minContTag = contTags[0]; |
| const Cont::ContTag maxContTag = contTags[(sizeof(contTags) / sizeof(Cont::ContTag)) - 1]; |
| #endif |
| |
| void Matcher::DoQueryContinue(const uint qcTicks) |
| { |
| // See definition of TimePerQc for description of regex QC heuristics |
| |
| const uint before = previousQcTime; |
| const uint now = GetTickCount(); |
| if ((!before || now - before < TimePerQc) && qcTicks & TicksPerQc - 1) |
| { |
| return; |
| } |
| |
| previousQcTime = now; |
| TraceQueryContinue(now); |
| |
| // Query-continue can be reentrant and run the same regex again. To prevent the matcher and other persistent objects |
| // from being reused reentrantly, save and restore them around the QC call. |
| class AutoCleanup |
| { |
| private: |
| RegexPattern *const pattern; |
| Matcher *const matcher; |
| RegexStacks * regexStacks; |
| |
| public: |
| AutoCleanup(RegexPattern *const pattern, Matcher *const matcher) : pattern(pattern), matcher(matcher) |
| { |
| Assert(pattern); |
| Assert(matcher); |
| Assert(pattern->rep.unified.matcher == matcher); |
| |
| pattern->rep.unified.matcher = nullptr; |
| |
| const auto scriptContext = pattern->GetScriptContext(); |
| regexStacks = scriptContext->SaveRegexStacks(); |
| } |
| |
| ~AutoCleanup() |
| { |
| pattern->rep.unified.matcher = matcher; |
| |
| const auto scriptContext = pattern->GetScriptContext(); |
| scriptContext->RestoreRegexStacks(regexStacks); |
| } |
| } autoCleanup(pattern, this); |
| |
| pattern->GetScriptContext()->GetThreadContext()->CheckScriptInterrupt(); |
| } |
| |
| void Matcher::TraceQueryContinue(const uint now) |
| { |
| if (!PHASE_TRACE1(Js::RegexQcPhase)) |
| { |
| return; |
| } |
| |
| Output::Print(_u("Regex QC")); |
| |
| static uint n = 0; |
| static uint firstQcTime = 0; |
| |
| ++n; |
| if (firstQcTime) |
| { |
| Output::Print(_u(" - frequency: %0.1f"), static_cast<double>(n * 1000) / (now - firstQcTime)); |
| } |
| else |
| { |
| firstQcTime = now; |
| } |
| |
| Output::Print(_u("\n")); |
| Output::Flush(); |
| } |
| |
| bool Matcher::Fail(const Char* const input, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks) |
| { |
| if (!contStack.IsEmpty()) |
| { |
| if (!RunContStack(input, inputOffset, instPointer, contStack, assertionStack, qcTicks)) |
| { |
| return false; |
| } |
| } |
| |
| Assert(assertionStack.IsEmpty()); |
| groupInfos[0].Reset(); |
| return true; // STOP EXECUTION |
| } |
| |
| inline bool Matcher::RunContStack(const Char* const input, CharCount &inputOffset, const uint8 *&instPointer, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks) |
| { |
| while (true) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| PopStats(contStack, input); |
| #endif |
| Cont* cont = contStack.Pop(); |
| if (cont == 0) |
| { |
| break; |
| } |
| |
| Assert(cont->tag >= minContTag && cont->tag <= maxContTag); |
| // All these cases RESUME EXECUTION if backtracking finds a stop point |
| const Cont::ContTag tag = cont->tag; |
| switch (tag) |
| { |
| #define M(O) case Cont::ContTag::O: if (((O##Cont*)cont)->Exec(*this, input, inputOffset, instPointer, contStack, assertionStack, qcTicks)) return false; break; |
| #include "RegexContcodes.h" |
| #undef M |
| default: |
| Assert(false); // should never be reached |
| return false; // however, can't use complier optimization if we wnat to return false here |
| } |
| } |
| return true; |
| } |
| |
| #if DBG |
| const Inst::InstTag instTags[] = { |
| #define M(TagName) Inst::InstTag::TagName, |
| #define MTemplate(TagName, ...) M(TagName) |
| #include "RegexOpCodes.h" |
| #undef M |
| #undef MTemplate |
| }; |
| |
| const Inst::InstTag minInstTag = instTags[0]; |
| const Inst::InstTag maxInstTag = instTags[(sizeof(instTags) / sizeof(Inst::InstTag)) - 1]; |
| #endif |
| |
| inline void Matcher::Run(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &nextSyncInputOffset, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration) |
| { |
| CharCount inputOffset = matchStart; |
| const uint8 *instPointer = program->rep.insts.insts; |
| Assert(instPointer != 0); |
| |
| while (true) |
| { |
| Assert(inputOffset >= matchStart && inputOffset <= inputLength); |
| Assert(instPointer >= program->rep.insts.insts && instPointer < program->rep.insts.insts + program->rep.insts.instsLen); |
| Assert(((Inst*)instPointer)->tag >= minInstTag && ((Inst*)instPointer)->tag <= maxInstTag); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| if (w != 0) |
| { |
| Print(w, input, inputLength, inputOffset, instPointer, contStack, assertionStack); |
| } |
| InstStats(); |
| #endif |
| const Inst *inst = (const Inst*)instPointer; |
| const Inst::InstTag tag = inst->tag; |
| switch (tag) |
| { |
| #define MBase(TagName, ClassName) \ |
| case Inst::InstTag::TagName: \ |
| if (((const ClassName *)inst)->Exec(*this, input, inputLength, matchStart, inputOffset, nextSyncInputOffset, instPointer, contStack, assertionStack, qcTicks, firstIteration)) { return; } \ |
| break; |
| #define M(TagName) MBase(TagName, TagName##Inst) |
| #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) MBase(TagName, SpecializedClassName) |
| #include "RegexOpCodes.h" |
| #undef MBase |
| #undef M |
| #undef MTemplate |
| default: |
| Assert(false); |
| __assume(false); |
| } |
| } |
| } |
| |
| #if DBG |
| void Matcher::ResetLoopInfos() |
| { |
| for (int i = 0; i < program->numLoops; i++) |
| { |
| loopInfos[i].Reset(); |
| } |
| } |
| #endif |
| |
| inline bool Matcher::MatchHere(const Char* const input, const CharCount inputLength, CharCount &matchStart, CharCount &nextSyncInputOffset, ContStack &contStack, AssertionStack &assertionStack, uint &qcTicks, bool firstIteration) |
| { |
| // Reset the continuation and assertion stacks ready for fresh run |
| // NOTE: We used to do this after the Run, but it's safer to do it here in case unusual control flow exits |
| // the matcher without executing the clears. |
| contStack.Clear(); |
| // assertionStack may be non-empty since we can hard fail directly out of matcher without popping assertion |
| assertionStack.Clear(); |
| |
| Assert(contStack.IsEmpty()); |
| Assert(assertionStack.IsEmpty()); |
| |
| ResetInnerGroups(0, program->numGroups - 1); |
| #if DBG |
| ResetLoopInfos(); |
| #endif |
| |
| Run(input, inputLength, matchStart, nextSyncInputOffset, contStack, assertionStack, qcTicks, firstIteration); |
| // Leave the continuation and assertion stack memory in place so we don't have to alloc next time |
| |
| return WasLastMatchSuccessful(); |
| } |
| |
| inline bool Matcher::MatchSingleCharCaseInsensitive(const Char* const input, const CharCount inputLength, CharCount offset, const Char c) |
| { |
| CaseInsensitive::MappingSource mappingSource = program->GetCaseMappingSource(); |
| |
| // If sticky flag is present, break since the 1st character didn't match the pattern character |
| if ((program->flags & StickyRegexFlag) != 0) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| if (MatchSingleCharCaseInsensitiveHere(mappingSource, input, offset, c)) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = offset; |
| info->length = 1; |
| return true; |
| } |
| else |
| { |
| ResetGroup(0); |
| return false; |
| } |
| } |
| |
| while (offset < inputLength) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| if (MatchSingleCharCaseInsensitiveHere(mappingSource, input, offset, c)) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = offset; |
| info->length = 1; |
| return true; |
| } |
| offset++; |
| } |
| |
| ResetGroup(0); |
| return false; |
| } |
| |
| inline bool Matcher::MatchSingleCharCaseInsensitiveHere( |
| CaseInsensitive::MappingSource mappingSource, |
| const Char* const input, |
| const CharCount offset, |
| const Char c) |
| { |
| return (standardChars->ToCanonical(mappingSource, input[offset]) == standardChars->ToCanonical(mappingSource, c)); |
| } |
| |
| inline bool Matcher::MatchSingleCharCaseSensitive(const Char* const input, const CharCount inputLength, CharCount offset, const Char c) |
| { |
| // If sticky flag is present, break since the 1st character didn't match the pattern character |
| if ((program->flags & StickyRegexFlag) != 0) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| if (input[offset] == c) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = offset; |
| info->length = 1; |
| return true; |
| } |
| else |
| { |
| ResetGroup(0); |
| return false; |
| } |
| } |
| |
| while (offset < inputLength) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| if (input[offset] == c) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = offset; |
| info->length = 1; |
| return true; |
| } |
| offset++; |
| } |
| |
| ResetGroup(0); |
| return false; |
| } |
| |
| inline bool Matcher::MatchBoundedWord(const Char* const input, const CharCount inputLength, CharCount offset) |
| { |
| const StandardChars<Char>& stdchrs = *standardChars; |
| |
| if (offset >= inputLength) |
| { |
| ResetGroup(0); |
| return false; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| |
| if ((offset == 0 && stdchrs.IsWord(input[0])) || |
| (offset > 0 && (!stdchrs.IsWord(input[offset - 1]) && stdchrs.IsWord(input[offset])))) |
| { |
| // Already at start of word |
| } |
| // If sticky flag is present, return false since we are not at the beginning of the word yet |
| else if ((program->flags & StickyRegexFlag) == StickyRegexFlag) |
| { |
| ResetGroup(0); |
| return false; |
| } |
| else |
| { |
| if (stdchrs.IsWord(input[offset])) |
| { |
| // Scan for end of current word |
| while (true) |
| { |
| offset++; |
| if (offset >= inputLength) |
| { |
| ResetGroup(0); |
| return false; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| if (!stdchrs.IsWord(input[offset])) |
| { |
| break; |
| } |
| } |
| } |
| |
| // Scan for start of next word |
| while (true) |
| { |
| offset++; |
| if (offset >= inputLength) |
| { |
| ResetGroup(0); |
| return false; |
| } |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| if (stdchrs.IsWord(input[offset])) |
| { |
| break; |
| } |
| } |
| } |
| |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = offset; |
| |
| // Scan for end of word |
| do |
| { |
| offset++; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| } |
| while (offset < inputLength && stdchrs.IsWord(input[offset])); |
| |
| info->length = offset - info->offset; |
| return true; |
| } |
| |
| inline bool Matcher::MatchLeadingTrailingSpaces(const Char* const input, const CharCount inputLength, CharCount offset) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| Assert(offset <= inputLength); |
| Assert((program->flags & MultilineRegexFlag) == 0); |
| |
| if (offset >= inputLength) |
| { |
| Assert(offset == inputLength); |
| if (program->rep.leadingTrailingSpaces.endMinMatch == 0 || |
| (offset == 0 && program->rep.leadingTrailingSpaces.beginMinMatch == 0)) |
| { |
| info->offset = offset; |
| info->length = 0; |
| return true; |
| } |
| info->Reset(); |
| return false; |
| } |
| |
| const StandardChars<Char> &stdchrs = *standardChars; |
| if (offset == 0) |
| { |
| while (offset < inputLength && stdchrs.IsWhitespaceOrNewline(input[offset])) |
| { |
| offset++; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| } |
| if (offset >= program->rep.leadingTrailingSpaces.beginMinMatch) |
| { |
| info->offset = 0; |
| info->length = offset; |
| return true; |
| } |
| } |
| |
| Assert(inputLength > 0); |
| const CharCount initOffset = offset; |
| offset = inputLength - 1; |
| while (offset >= initOffset && stdchrs.IsWhitespaceOrNewline(input[offset])) |
| { |
| // This can never underflow since initOffset > 0 |
| Assert(offset > 0); |
| offset--; |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| CompStats(); |
| #endif |
| } |
| offset++; |
| CharCount length = inputLength - offset; |
| if (length >= program->rep.leadingTrailingSpaces.endMinMatch) |
| { |
| info->offset = offset; |
| info->length = length; |
| return true; |
| } |
| info->Reset(); |
| return false; |
| } |
| |
| inline bool Matcher::MatchOctoquad(const Char* const input, const CharCount inputLength, CharCount offset, OctoquadMatcher* matcher) |
| { |
| if (matcher->Match |
| ( input |
| , inputLength |
| , offset |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , stats |
| #endif |
| )) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = offset; |
| info->length = TrigramInfo::PatternLength; |
| return true; |
| } |
| else |
| { |
| ResetGroup(0); |
| return false; |
| } |
| } |
| |
| inline bool Matcher::MatchBOILiteral2(const Char* const input, const CharCount inputLength, CharCount offset, DWORD literal2) |
| { |
| if (offset == 0 && inputLength >= 2) |
| { |
| CompileAssert(sizeof(Char) == 2); |
| const Program * program = this->program; |
| if (program->rep.boiLiteral2.literal == *(DWORD *)input) |
| { |
| GroupInfo* const info = GroupIdToGroupInfo(0); |
| info->offset = 0; |
| info->length = 2; |
| return true; |
| } |
| } |
| ResetGroup(0); |
| return false; |
| } |
| |
| bool Matcher::Match |
| ( const Char* const input |
| , const CharCount inputLength |
| , CharCount offset |
| , Js::ScriptContext * scriptContext |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , RegexStats* stats |
| , DebugWriter* w |
| #endif |
| ) |
| { |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| this->stats = stats; |
| this->w = w; |
| #endif |
| |
| Assert(offset <= inputLength); |
| bool res; |
| bool loopMatchHere = true; |
| Program const *prog = this->program; |
| bool isStickyPresent = this->pattern->IsSticky(); |
| switch (prog->tag) |
| { |
| case Program::ProgramTag::BOIInstructionsTag: |
| if (offset != 0) |
| { |
| groupInfos[0].Reset(); |
| res = false; |
| break; |
| } |
| |
| // fall through |
| |
| case Program::ProgramTag::BOIInstructionsForStickyFlagTag: |
| AssertMsg(prog->tag == Program::ProgramTag::BOIInstructionsTag || isStickyPresent, "prog->tag should be BOIInstructionsForStickyFlagTag if sticky = true."); |
| |
| loopMatchHere = false; |
| |
| // fall through |
| |
| case Program::ProgramTag::InstructionsTag: |
| { |
| previousQcTime = 0; |
| uint qcTicks = 0; |
| |
| // This is the next offset in the input from where we will try to sync. For sync instructions that back up, this |
| // is used to avoid trying to sync when we have not yet reached the offset in the input we last synced to before |
| // backing up. |
| CharCount nextSyncInputOffset = offset; |
| |
| RegexStacks * regexStacks = scriptContext->RegexStacks(); |
| |
| // Need to continue matching even if matchStart == inputLim since some patterns may match an empty string at the end |
| // of the input. For instance: /a*$/.exec("b") |
| bool firstIteration = true; |
| do |
| { |
| // Let there be only one call to MatchHere(), as that call expands the interpreter loop in-place. Having |
| // multiple calls to MatchHere() would bloat the code. |
| res = MatchHere(input, inputLength, offset, nextSyncInputOffset, regexStacks->contStack, regexStacks->assertionStack, qcTicks, firstIteration); |
| firstIteration = false; |
| } while(!res && loopMatchHere && ++offset <= inputLength); |
| |
| break; |
| } |
| |
| case Program::ProgramTag::SingleCharTag: |
| if (this->pattern->IsIgnoreCase()) |
| { |
| res = MatchSingleCharCaseInsensitive(input, inputLength, offset, prog->rep.singleChar.c); |
| } |
| else |
| { |
| res = MatchSingleCharCaseSensitive(input, inputLength, offset, prog->rep.singleChar.c); |
| } |
| |
| break; |
| |
| case Program::ProgramTag::BoundedWordTag: |
| res = MatchBoundedWord(input, inputLength, offset); |
| break; |
| |
| case Program::ProgramTag::LeadingTrailingSpacesTag: |
| res = MatchLeadingTrailingSpaces(input, inputLength, offset); |
| break; |
| |
| case Program::ProgramTag::OctoquadTag: |
| res = MatchOctoquad(input, inputLength, offset, prog->rep.octoquad.matcher); |
| break; |
| |
| case Program::ProgramTag::BOILiteral2Tag: |
| res = MatchBOILiteral2(input, inputLength, offset, prog->rep.boiLiteral2.literal); |
| break; |
| |
| default: |
| Assert(false); |
| __assume(false); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| this->stats = 0; |
| this->w = 0; |
| #endif |
| |
| return res; |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Matcher::Print(DebugWriter* w, const Char* const input, const CharCount inputLength, CharCount inputOffset, const uint8* instPointer, ContStack &contStack, AssertionStack &assertionStack) const |
| { |
| w->PrintEOL(_u("Matcher {")); |
| w->Indent(); |
| w->Print(_u("program: ")); |
| w->PrintQuotedString(program->source, program->sourceLen); |
| w->EOL(); |
| w->Print(_u("inputPointer: ")); |
| if (inputLength == 0) |
| { |
| w->PrintEOL(_u("<empty input>")); |
| } |
| else if (inputLength > 1024) |
| { |
| w->PrintEOL(_u("<string too large>")); |
| } |
| else |
| { |
| w->PrintEscapedString(input, inputOffset); |
| if (inputOffset >= inputLength) |
| { |
| w->Print(_u("<<<>>>")); |
| } |
| else |
| { |
| w->Print(_u("<<<")); |
| w->PrintEscapedChar(input[inputOffset]); |
| w->Print(_u(">>>")); |
| w->PrintEscapedString(input + inputOffset + 1, inputLength - inputOffset - 1); |
| } |
| w->EOL(); |
| } |
| if (program->tag == Program::ProgramTag::BOIInstructionsTag || program->tag == Program::ProgramTag::InstructionsTag) |
| { |
| w->Print(_u("instPointer: ")); |
| |
| const Inst* inst = (const Inst*)instPointer; |
| switch (inst->tag) |
| { |
| #define MBase(TagName, ClassName) \ |
| case Inst::InstTag::TagName: \ |
| { \ |
| const ClassName *actualInst = static_cast<const ClassName *>(inst); \ |
| actualInst->Print(w, InstPointerToLabel(instPointer), program->rep.insts.litbuf); \ |
| break; \ |
| } |
| #define M(TagName) MBase(TagName, TagName##Inst) |
| #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) MBase(TagName, SpecializedClassName) |
| #include "RegexOpCodes.h" |
| #undef MBase |
| #undef M |
| #undef MTemplate |
| default: |
| Assert(false); |
| __assume(false); |
| } |
| |
| w->PrintEOL(_u("groups:")); |
| w->Indent(); |
| for (int i = 0; i < program->numGroups; i++) |
| { |
| w->Print(_u("%d: "), i); |
| groupInfos[i].Print(w, input); |
| w->EOL(); |
| } |
| w->Unindent(); |
| w->PrintEOL(_u("loops:")); |
| w->Indent(); |
| for (int i = 0; i < program->numLoops; i++) |
| { |
| w->Print(_u("%d: "), i); |
| loopInfos[i].Print(w); |
| w->EOL(); |
| } |
| w->Unindent(); |
| w->PrintEOL(_u("contStack: (top to bottom)")); |
| w->Indent(); |
| contStack.Print(w, input); |
| w->Unindent(); |
| w->PrintEOL(_u("assertionStack: (top to bottom)")); |
| w->Indent(); |
| assertionStack.Print(w, this); |
| w->Unindent(); |
| } |
| w->Unindent(); |
| w->PrintEOL(_u("}")); |
| w->Flush(); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // Program |
| // ---------------------------------------------------------------------- |
| |
| Program::Program(RegexFlags flags) |
| : source(nullptr) |
| , sourceLen(0) |
| , flags(flags) |
| , numGroups(0) |
| , numLoops(0) |
| { |
| tag = ProgramTag::InstructionsTag; |
| rep.insts.insts = nullptr; |
| rep.insts.instsLen = 0; |
| rep.insts.litbuf = nullptr; |
| rep.insts.litbufLen = 0; |
| rep.insts.scannersForSyncToLiterals = nullptr; |
| } |
| |
| Program *Program::New(Recycler *recycler, RegexFlags flags) |
| { |
| return RecyclerNew(recycler, Program, flags); |
| } |
| |
| Field(ScannerInfo *)*Program::CreateScannerArrayForSyncToLiterals(Recycler *const recycler) |
| { |
| Assert(tag == ProgramTag::InstructionsTag); |
| Assert(!rep.insts.scannersForSyncToLiterals); |
| Assert(recycler); |
| |
| return |
| rep.insts.scannersForSyncToLiterals = |
| RecyclerNewArrayZ(recycler, Field(ScannerInfo *), ScannersMixin::MaxNumSyncLiterals); |
| } |
| |
| ScannerInfo *Program::AddScannerForSyncToLiterals( |
| Recycler *const recycler, |
| const int scannerIndex, |
| const CharCount offset, |
| const CharCount length, |
| const bool isEquivClass) |
| { |
| Assert(tag == ProgramTag::InstructionsTag); |
| Assert(rep.insts.scannersForSyncToLiterals); |
| Assert(recycler); |
| Assert(scannerIndex >= 0); |
| Assert(scannerIndex < ScannersMixin::MaxNumSyncLiterals); |
| Assert(!rep.insts.scannersForSyncToLiterals[scannerIndex]); |
| |
| return |
| rep.insts.scannersForSyncToLiterals[scannerIndex] = |
| RecyclerNewLeaf(recycler, ScannerInfo, offset, length, isEquivClass); |
| } |
| |
| void Program::FreeBody(ArenaAllocator* rtAllocator) |
| { |
| if (tag != ProgramTag::InstructionsTag || !rep.insts.insts) |
| { |
| return; |
| } |
| |
| Inst *inst = reinterpret_cast<Inst *>(PointerValue(rep.insts.insts)); |
| const auto instEnd = reinterpret_cast<Inst *>(reinterpret_cast<uint8 *>(inst) + rep.insts.instsLen); |
| Assert(inst < instEnd); |
| do |
| { |
| switch(inst->tag) |
| { |
| #define MBase(TagName, ClassName) \ |
| case Inst::InstTag::TagName: \ |
| { \ |
| const auto actualInst = static_cast<ClassName *>(inst); \ |
| actualInst->FreeBody(rtAllocator); \ |
| inst = actualInst + 1; \ |
| break; \ |
| } |
| #define M(TagName) MBase(TagName, TagName##Inst) |
| #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) MBase(TagName, SpecializedClassName) |
| #include "RegexOpCodes.h" |
| #undef MBase |
| #undef M |
| #undef MTemplate |
| default: |
| Assert(false); |
| __assume(false); |
| } |
| } while(inst < instEnd); |
| Assert(inst == instEnd); |
| |
| #if DBG |
| rep.insts.insts = nullptr; |
| rep.insts.instsLen = 0; |
| #endif |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void Program::Print(DebugWriter* w) |
| { |
| const bool isBaselineMode = Js::Configuration::Global.flags.BaselineMode; |
| w->PrintEOL(_u("Program {")); |
| w->Indent(); |
| w->PrintEOL(_u("source: %s"), PointerValue(source)); |
| |
| w->Print(_u("litbuf: ")); |
| const char16 *litbuf = this->rep.insts.litbuf; |
| size_t litbufLen = 0; |
| if (litbuf == nullptr) |
| { |
| w->PrintEOL(_u("<NONE>")); |
| } |
| else |
| { |
| litbufLen = this->rep.insts.litbufLen; |
| for (size_t i = 0; i < litbufLen; ++i) |
| { |
| const char16 c = (char16)litbuf[i]; |
| w->PrintEscapedChar(c); |
| } |
| w->PrintEOL(_u("")); |
| } |
| w->PrintEOL(_u("litbufLen: %u"), litbufLen); |
| |
| w->Print(_u("flags: ")); |
| if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global ")); |
| if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline ")); |
| if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase ")); |
| if ((flags & DotAllRegexFlag) != 0) w->Print(_u("dotAll ")); |
| if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode ")); |
| if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky ")); |
| w->EOL(); |
| w->PrintEOL(_u("numGroups: %d"), numGroups); |
| w->PrintEOL(_u("numLoops: %d"), numLoops); |
| switch (tag) |
| { |
| case ProgramTag::BOIInstructionsTag: |
| case ProgramTag::InstructionsTag: |
| { |
| w->PrintEOL(_u("instructions: {")); |
| w->Indent(); |
| if (tag == ProgramTag::BOIInstructionsTag) |
| { |
| w->PrintEOL(_u(" BOITest(hardFail: true)")); |
| } |
| uint8* instsLim = rep.insts.insts + rep.insts.instsLen; |
| uint8* curr = rep.insts.insts; |
| int i = 0; |
| while (curr != instsLim) |
| { |
| const Inst *inst = (const Inst*)curr; |
| switch (inst->tag) |
| { |
| #define MBase(TagName, ClassName) \ |
| case Inst::InstTag::TagName: \ |
| { \ |
| const ClassName *actualInst = static_cast<const ClassName *>(inst); \ |
| curr += actualInst->Print(w, (Label)(isBaselineMode ? i++ : curr - rep.insts.insts), rep.insts.litbuf); \ |
| break; \ |
| } |
| #define M(TagName) MBase(TagName, TagName##Inst) |
| #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) MBase(TagName, SpecializedClassName) |
| #include "RegexOpCodes.h" |
| #undef MBase |
| #undef M |
| #undef MTemplate |
| default: |
| Assert(false); |
| __assume(false); |
| } |
| } |
| w->Unindent(); |
| w->PrintEOL(_u("}")); |
| } |
| break; |
| case ProgramTag::SingleCharTag: |
| w->Print(_u("special form: <match single char ")); |
| w->PrintQuotedChar(rep.singleChar.c); |
| w->PrintEOL(_u(">")); |
| break; |
| case ProgramTag::BoundedWordTag: |
| w->PrintEOL(_u("special form: <match bounded word>")); |
| break; |
| case ProgramTag::LeadingTrailingSpacesTag: |
| w->PrintEOL(_u("special form: <match leading/trailing spaces: minBegin=%d minEnd=%d>"), |
| rep.leadingTrailingSpaces.beginMinMatch, rep.leadingTrailingSpaces.endMinMatch); |
| break; |
| case ProgramTag::OctoquadTag: |
| w->Print(_u("special form: <octoquad ")); |
| rep.octoquad.matcher->Print(w); |
| w->PrintEOL(_u(">")); |
| break; |
| } |
| w->Unindent(); |
| w->PrintEOL(_u("}")); |
| } |
| #endif |
| |
| // Template parameter here is the max number of cases |
| template void UnifiedRegex::SwitchMixin<2>::AddCase(char16, Label); |
| template void UnifiedRegex::SwitchMixin<4>::AddCase(char16, Label); |
| template void UnifiedRegex::SwitchMixin<8>::AddCase(char16, Label); |
| template void UnifiedRegex::SwitchMixin<16>::AddCase(char16, Label); |
| template void UnifiedRegex::SwitchMixin<24>::AddCase(char16, Label); |
| |
| #define M(...) |
| #define MTemplate(TagName, TemplateDeclaration, GenericClassName, SpecializedClassName) template struct SpecializedClassName; |
| #include "RegexOpCodes.h" |
| #undef M |
| #undef MTemplate |
| } |