| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| #include "ParserPch.h" |
| #include "Common/MathUtil.h" |
| |
| namespace UnifiedRegex |
| { |
| // ---------------------------------------------------------------------- |
| // CharBitVec |
| // ---------------------------------------------------------------------- |
| |
| uint CharBitvec::Count() const |
| { |
| uint n = 0; |
| for (int w = 0; w < vecSize; w++) |
| { |
| n += Math::PopCnt32(vec[w]); |
| } |
| return n; |
| } |
| |
| int CharBitvec::NextSet(int k) const |
| { |
| if (k < 0 || k >= Size) |
| return -1; |
| uint w = k / wordSize; |
| uint o = k % wordSize; |
| uint32 v = vec[w] >> o; |
| do |
| { |
| if (v == 0) |
| { |
| k += wordSize - o; |
| break; |
| } |
| else if ((v & 0x1) != 0) |
| return k; |
| else |
| { |
| v >>= 1; |
| o++; |
| k++; |
| } |
| } |
| while (o < wordSize); |
| |
| w++; |
| while (w < vecSize) |
| { |
| o = 0; |
| v = vec[w]; |
| do |
| { |
| if (v == 0) |
| { |
| k += wordSize - o; |
| break; |
| } |
| else if ((v & 0x1) != 0) |
| return k; |
| else |
| { |
| v >>= 1; |
| o++; |
| k++; |
| } |
| |
| } |
| while (o < wordSize); |
| w++; |
| } |
| return -1; |
| } |
| |
| int CharBitvec::NextClear(int k) const |
| { |
| if (k < 0 || k >= Size) |
| return -1; |
| uint w = k / wordSize; |
| uint o = k % wordSize; |
| uint32 v = vec[w] >> o; |
| do |
| { |
| if (v == ones) |
| { |
| k += wordSize - o; |
| break; |
| } |
| else if ((v & 0x1) == 0) |
| return k; |
| else |
| { |
| v >>= 1; |
| o++; |
| k++; |
| } |
| } |
| while (o < wordSize); |
| |
| w++; |
| while (w < vecSize) |
| { |
| o = 0; |
| v = vec[w]; |
| do |
| { |
| if (v == ones) |
| { |
| k += wordSize - o; |
| break; |
| } |
| else if ((v & 0x1) == 0) |
| return k; |
| else |
| { |
| v >>= 1; |
| o++; |
| k++; |
| } |
| |
| } |
| while (o < wordSize); |
| w++; |
| } |
| return -1; |
| } |
| |
| template <typename C> |
| void CharBitvec::ToComplement(ArenaAllocator* allocator, uint base, CharSet<C>& result) const |
| { |
| int hi = -1; |
| while (true) |
| { |
| // Find the next range of clear bits in vector |
| int li = NextClear(hi + 1); |
| if (li < 0) |
| return; |
| hi = NextSet(li + 1); |
| if (hi < 0) |
| hi = Size - 1; |
| else |
| { |
| Assert(hi > 0); |
| hi--; |
| } |
| |
| // Add range as characters |
| result.SetRange(allocator, Chars<C>::ITC(base + li), Chars<C>::ITC(base + hi)); |
| } |
| } |
| |
| template <typename C> |
| void CharBitvec::ToEquivClass(ArenaAllocator* allocator, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset) const |
| { |
| int hi = -1; |
| while (true) |
| { |
| // Find the next range of set bits in vector |
| int li = NextSet(hi + 1); |
| if (li < 0) |
| return; |
| hi = NextClear(li + 1); |
| if (hi < 0) |
| hi = Size - 1; |
| else |
| { |
| Assert(hi > 0); |
| hi--; |
| } |
| |
| // Convert to character codes |
| uint l = base + li + baseOffset; |
| uint h = base + hi + baseOffset; |
| |
| do |
| { |
| uint acth; |
| C equivl[CaseInsensitive::EquivClassSize]; |
| CaseInsensitive::RangeToEquivClass(tblidx, l, h, acth, equivl); |
| uint n = acth - l; |
| for (int i = 0; i < CaseInsensitive::EquivClassSize; i++) |
| { |
| result.SetRange(allocator, equivl[i], Chars<C>::Shift(equivl[i], n)); |
| } |
| |
| // Go around again for rest of this range |
| l = acth + 1; |
| } |
| while (l <= h); |
| } |
| } |
| |
| // ---------------------------------------------------------------------- |
| // CharSetNode |
| // ---------------------------------------------------------------------- |
| |
| CharSetNode* CharSetNode::For(ArenaAllocator* allocator, int level) |
| { |
| if (level == 0) |
| return Anew(allocator, CharSetLeaf); |
| else |
| return Anew(allocator, CharSetInner); |
| } |
| |
| // ---------------------------------------------------------------------- |
| // CharSetFull |
| // ---------------------------------------------------------------------- |
| |
| CharSetFull CharSetFull::Instance; |
| |
| CharSetFull* const CharSetFull::TheFullNode = &CharSetFull::Instance; |
| |
| CharSetFull::CharSetFull() {} |
| |
| void CharSetFull::FreeSelf(ArenaAllocator* allocator) |
| { |
| Assert(this == TheFullNode); |
| // Never allocated |
| } |
| |
| CharSetNode* CharSetFull::Clone(ArenaAllocator* allocator) const |
| { |
| // Always shared |
| return (CharSetNode*)this; |
| } |
| |
| CharSetNode* CharSetFull::Set(ArenaAllocator* allocator, uint level, uint l, uint h) |
| { |
| return this; |
| } |
| |
| CharSetNode* CharSetFull::ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) |
| { |
| AssertMsg(h <= lim(level), "The range for clearing provided is invalid for this level."); |
| AssertMsg(l <= h, "Can't clear where lower is bigger than the higher."); |
| if (l == 0 && h == lim(level)) |
| { |
| return nullptr; |
| } |
| |
| CharSetNode* toReturn = For(allocator, level); |
| |
| if (l > 0) |
| { |
| AssertVerify(toReturn->Set(allocator, level, 0, l - 1) == toReturn); |
| } |
| |
| if (h < lim(level)) |
| { |
| AssertVerify(toReturn->Set(allocator, level, h + 1, lim(level)) == toReturn); |
| } |
| |
| return toReturn; |
| } |
| |
| CharSetNode* CharSetFull::UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) |
| { |
| return this; |
| } |
| |
| bool CharSetFull::Get(uint level, uint k) const |
| { |
| return true; |
| } |
| |
| void CharSetFull::ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const |
| { |
| // Empty, so add nothing |
| } |
| |
| void CharSetFull::ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const |
| { |
| this->ToEquivClass<char16>(allocator, level, base, tblidx, result); |
| } |
| |
| void CharSetFull::ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const |
| { |
| this->ToEquivClass<codepoint_t>(allocator, level, base, tblidx, result, baseOffset); |
| } |
| |
| template <typename C> |
| void CharSetFull::ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset) const |
| { |
| uint l = base + (CharSetNode::levels - 1 == level ? 0xff : 0) + baseOffset; |
| uint h = base + lim(level) + baseOffset; |
| |
| do |
| { |
| uint acth; |
| C equivl[CaseInsensitive::EquivClassSize]; |
| CaseInsensitive::RangeToEquivClass(tblidx, l, h, acth, equivl); |
| uint n = acth - l; |
| for (int i = 0; i < CaseInsensitive::EquivClassSize; i++) |
| { |
| result.SetRange(allocator, equivl[i], Chars<C>::Shift(equivl[i], n)); |
| } |
| |
| // Go around again for rest of this range |
| l = acth + 1; |
| } |
| while (l <= h); |
| } |
| |
| bool CharSetFull::IsSubsetOf(uint level, const CharSetNode* other) const |
| { |
| Assert(other != nullptr); |
| return other == TheFullNode; |
| } |
| |
| bool CharSetFull::IsEqualTo(uint level, const CharSetNode* other) const |
| { |
| Assert(other != nullptr); |
| return other == TheFullNode; |
| } |
| |
| uint CharSetFull::Count(uint level) const |
| { |
| return lim(level) + 1; |
| } |
| |
| _Success_(return) |
| bool CharSetFull::GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const |
| { |
| Assert(searchCharStart < this->Count(level)); |
| |
| *outLowerChar = searchCharStart; |
| *outHigherChar = (Char)this->Count(level) - 1; |
| |
| return true; |
| } |
| |
| #if DBG |
| bool CharSetFull::IsLeaf() const |
| { |
| return false; |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // CharSetInner |
| // ---------------------------------------------------------------------- |
| |
| CharSetInner::CharSetInner() |
| { |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| children[i] = 0; |
| } |
| |
| void CharSetInner::FreeSelf(ArenaAllocator* allocator) |
| { |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != 0) |
| { |
| children[i]->FreeSelf(allocator); |
| #if DBG |
| children[i] = 0; |
| #endif |
| } |
| } |
| Adelete(allocator, this); |
| } |
| |
| CharSetNode* CharSetInner::Clone(ArenaAllocator* allocator) const |
| { |
| CharSetInner* res = Anew(allocator, CharSetInner); |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != 0) |
| res->children[i] = children[i]->Clone(allocator); |
| } |
| return res; |
| } |
| |
| CharSetNode* CharSetInner::ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) |
| { |
| Assert(level > 0); |
| AssertMsg(h <= lim(level), "The range for clearing provided is invalid for this level."); |
| AssertMsg(l <= h, "Can't clear where lower is bigger than the higher."); |
| if (l == 0 && h == lim(level)) |
| { |
| return nullptr; |
| } |
| |
| uint lowerIndex = innerIdx(level, l); |
| uint higherIndex = innerIdx(level--, h); |
| l = l & lim(level); |
| h = h & lim(level); |
| if (lowerIndex == higherIndex) |
| { |
| if (children[lowerIndex] != nullptr) |
| { |
| children[lowerIndex] = children[lowerIndex]->ClearRange(allocator, level, l, h); |
| } |
| } |
| else |
| { |
| if (children[lowerIndex] != nullptr) |
| { |
| children[lowerIndex] = children[lowerIndex]->ClearRange(allocator, level, l, lim(level)); |
| } |
| |
| for (uint i = lowerIndex + 1; i < higherIndex; i++) |
| { |
| if (children[i] != nullptr) |
| { |
| children[i]->FreeSelf(allocator); |
| } |
| |
| children[i] = nullptr; |
| } |
| |
| if (children[higherIndex] != nullptr) |
| { |
| children[higherIndex] = children[higherIndex]->ClearRange(allocator, level, 0, h); |
| } |
| } |
| for (int i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != nullptr) |
| { |
| return this; |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| CharSetNode* CharSetInner::Set(ArenaAllocator* allocator, uint level, uint l, uint h) |
| { |
| Assert(level > 0); |
| uint li = innerIdx(level, l); |
| uint hi = innerIdx(level--, h); |
| bool couldBeFull = true; |
| if (li == hi) |
| { |
| if (children[li] == nullptr) |
| { |
| if (remain(level, l) == 0 && remain(level, h + 1) == 0) |
| children[li] = CharSetFull::TheFullNode; |
| else |
| { |
| children[li] = For(allocator, level); |
| children[li] = children[li]->Set(allocator, level, l, h); |
| couldBeFull = false; |
| } |
| } |
| else |
| children[li] = children[li]->Set(allocator, level, l, h); |
| } |
| else |
| { |
| if (children[li] == nullptr) |
| { |
| if (remain(level, l) == 0) |
| children[li] = CharSetFull::TheFullNode; |
| else |
| { |
| children[li] = For(allocator, level); |
| children[li] = children[li]->Set(allocator, level, l, lim(level)); |
| couldBeFull = false; |
| } |
| } |
| else |
| children[li] = children[li]->Set(allocator, level, l, lim(level)); |
| for (uint i = li + 1; i < hi; i++) |
| { |
| if (children[i] != nullptr) |
| children[i]->FreeSelf(allocator); |
| children[i] = CharSetFull::TheFullNode; |
| } |
| if (children[hi] == nullptr) |
| { |
| if (remain(level, h + 1) == 0) |
| children[hi] = CharSetFull::TheFullNode; |
| else |
| { |
| children[hi] = For(allocator, level); |
| children[hi] = children[hi]->Set(allocator, level, 0, h); |
| couldBeFull = false; |
| } |
| } |
| else |
| children[hi] = children[hi]->Set(allocator, level, 0, h); |
| } |
| if (couldBeFull) |
| { |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != CharSetFull::TheFullNode) |
| return this; |
| } |
| FreeSelf(allocator); |
| return CharSetFull::TheFullNode; |
| } |
| else |
| return this; |
| } |
| |
| CharSetNode* CharSetInner::UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) |
| { |
| Assert(level > 0); |
| Assert(other != nullptr && other != CharSetFull::TheFullNode && !other->IsLeaf()); |
| CharSetInner* otherInner = (CharSetInner*)other; |
| level--; |
| bool isFull = true; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (otherInner->children[i] != nullptr) |
| { |
| if (otherInner->children[i] == CharSetFull::TheFullNode) |
| { |
| if (children[i] != nullptr) |
| children[i]->FreeSelf(allocator); |
| children[i] = CharSetFull::TheFullNode; |
| } |
| else |
| { |
| if (children[i] == nullptr) |
| children[i] = For(allocator, level); |
| children[i] = children[i]->UnionInPlace(allocator, level, otherInner->children[i]); |
| if (children[i] != CharSetFull::TheFullNode) |
| isFull = false; |
| } |
| } |
| else if (children[i] != CharSetFull::TheFullNode) |
| isFull = false; |
| } |
| if (isFull) |
| { |
| FreeSelf(allocator); |
| return CharSetFull::TheFullNode; |
| } |
| else |
| return this; |
| } |
| |
| bool CharSetInner::Get(uint level, uint k) const |
| { |
| Assert(level > 0); |
| uint i = innerIdx(level--, k); |
| if (children[i] == nullptr) |
| return false; |
| else |
| return children[i]->Get(level, k); |
| } |
| |
| void CharSetInner::ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const |
| { |
| Assert(level > 0); |
| level--; |
| uint delta = lim(level) + 1; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] == nullptr) |
| // Caution: Part of the range for this child may overlap with direct vector |
| result.SetRange(allocator, UTC(max(base, directSize)), UTC(base + delta - 1)); |
| else |
| children[i]->ToComplement(allocator, level, base, result); |
| base += delta; |
| } |
| } |
| |
| void CharSetInner::ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const |
| { |
| Assert(level > 0); |
| level--; |
| uint delta = lim(level) + 1; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != nullptr) |
| { |
| children[i]->ToEquivClassW(allocator, level, base, tblidx, result); |
| } |
| base += delta; |
| } |
| } |
| |
| void CharSetInner::ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const |
| { |
| Assert(level > 0); |
| level--; |
| uint delta = lim(level) + 1; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != nullptr) |
| { |
| children[i]->ToEquivClassCP(allocator, level, base, tblidx, result, baseOffset); |
| } |
| base += delta; |
| } |
| } |
| |
| bool CharSetInner::IsSubsetOf(uint level, const CharSetNode* other) const |
| { |
| Assert(level > 0); |
| Assert(other != nullptr && !other->IsLeaf()); |
| if (other == CharSetFull::TheFullNode) |
| return true; |
| level--; |
| const CharSetInner* otherInner = (CharSetInner*)other; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != nullptr) |
| { |
| if (otherInner->children[i] == nullptr) |
| return false; |
| if (children[i]->IsSubsetOf(level, otherInner->children[i])) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool CharSetInner::IsEqualTo(uint level, const CharSetNode* other) const |
| { |
| Assert(level > 0); |
| Assert(other != nullptr && !other->IsLeaf()); |
| if (other == CharSetFull::TheFullNode) |
| return false; |
| level--; |
| const CharSetInner* otherInner = (CharSetInner*)other; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != 0) |
| { |
| if (otherInner->children[i] == nullptr) |
| return false; |
| if (children[i]->IsSubsetOf(level, otherInner->children[i])) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| uint CharSetInner::Count(uint level) const |
| { |
| uint n = 0; |
| Assert(level > 0); |
| level--; |
| for (uint i = 0; i < branchingPerInnerLevel; i++) |
| { |
| if (children[i] != nullptr) |
| n += children[i]->Count(level); |
| } |
| return n; |
| } |
| |
| _Success_(return) |
| bool CharSetInner::GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const |
| { |
| Assert(searchCharStart < this->lim(level) + 1); |
| uint innerIndex = innerIdx(level--, searchCharStart); |
| |
| Char currentLowChar = 0, currentHighChar = 0; |
| |
| for (; innerIndex < branchingPerInnerLevel; innerIndex++) |
| { |
| if (children[innerIndex] != nullptr && children[innerIndex]->GetNextRange(level, (Char)remain(level, searchCharStart), ¤tLowChar, ¤tHighChar)) |
| { |
| break; |
| } |
| |
| if (innerIndex < branchingPerInnerLevel - 1) |
| { |
| searchCharStart = (Char)indexToValue(level + 1, innerIndex + 1, 0); |
| } |
| } |
| |
| if (innerIndex == branchingPerInnerLevel) |
| { |
| return false; |
| } |
| |
| currentLowChar = (Char)indexToValue(level + 1, innerIndex, currentLowChar); |
| currentHighChar = (Char)indexToValue(level + 1, innerIndex, currentHighChar); |
| |
| innerIndex += 1; |
| |
| for (; remain(level, currentHighChar) == lim(level) && innerIndex < branchingPerInnerLevel; innerIndex++) |
| { |
| Char tempLower, tempHigher; |
| if (children[innerIndex] == nullptr || !children[innerIndex]->GetNextRange(level, 0x0, &tempLower, &tempHigher) || remain(level, tempLower) != 0) |
| { |
| break; |
| } |
| |
| currentHighChar = (Char)indexToValue(level + 1, innerIndex, tempHigher); |
| } |
| |
| *outLowerChar = currentLowChar; |
| *outHigherChar = currentHighChar; |
| |
| return true; |
| } |
| |
| #if DBG |
| bool CharSetInner::IsLeaf() const |
| { |
| return false; |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // CharSetLeaf |
| // ---------------------------------------------------------------------- |
| |
| CharSetLeaf::CharSetLeaf() |
| { |
| vec.Clear(); |
| } |
| |
| void CharSetLeaf::FreeSelf(ArenaAllocator* allocator) |
| { |
| Adelete(allocator, this); |
| } |
| |
| CharSetNode* CharSetLeaf::Clone(ArenaAllocator* allocator) const |
| { |
| return Anew(allocator, CharSetLeaf, *this); |
| } |
| |
| CharSetNode* CharSetLeaf::Set(ArenaAllocator* allocator, uint level, uint l, uint h) |
| { |
| Assert(level == 0); |
| vec.SetRange(leafIdx(l), leafIdx(h)); |
| if (vec.IsFull()) |
| { |
| FreeSelf(allocator); |
| return CharSetFull::TheFullNode; |
| } |
| else |
| return this; |
| } |
| |
| CharSetNode* CharSetLeaf::ClearRange(ArenaAllocator* allocator, uint level, uint l, uint h) |
| { |
| Assert(level == 0); |
| AssertMsg(h <= lim(level), "The range for clearing provided is invalid for this level."); |
| AssertMsg(l <= h, "Can't clear where lower is bigger than the higher."); |
| if (l == 0 && h == lim(level)) |
| { |
| return nullptr; |
| } |
| |
| vec.ClearRange(leafIdx(l), leafIdx(h)); |
| |
| if (vec.IsEmpty()) |
| { |
| FreeSelf(allocator); |
| return nullptr; |
| } |
| |
| return this; |
| } |
| |
| CharSetNode* CharSetLeaf::UnionInPlace(ArenaAllocator* allocator, uint level, const CharSetNode* other) |
| { |
| Assert(level == 0); |
| Assert(other != nullptr && other->IsLeaf()); |
| CharSetLeaf* otherLeaf = (CharSetLeaf*)other; |
| if (vec.UnionInPlaceFullCheck(otherLeaf->vec)) |
| { |
| FreeSelf(allocator); |
| return CharSetFull::TheFullNode; |
| } |
| else |
| return this; |
| } |
| |
| bool CharSetLeaf::Get(uint level, uint k) const |
| { |
| Assert(level == 0); |
| return vec.Get(leafIdx(k)); |
| } |
| |
| void CharSetLeaf::ToComplement(ArenaAllocator* allocator, uint level, uint base, CharSet<Char>& result) const |
| { |
| Assert(level == 0); |
| vec.ToComplement<char16>(allocator, base, result); |
| } |
| |
| void CharSetLeaf::ToEquivClassW(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<char16>& result) const |
| { |
| this->ToEquivClass<char16>(allocator, level, base, tblidx, result); |
| } |
| |
| void CharSetLeaf::ToEquivClassCP(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<codepoint_t>& result, codepoint_t baseOffset) const |
| { |
| this->ToEquivClass<codepoint_t>(allocator, level, base, tblidx, result, baseOffset); |
| } |
| |
| template <typename C> |
| void CharSetLeaf::ToEquivClass(ArenaAllocator* allocator, uint level, uint base, uint& tblidx, CharSet<C>& result, codepoint_t baseOffset) const |
| { |
| Assert(level == 0); |
| vec.ToEquivClass<C>(allocator, base, tblidx, result, baseOffset); |
| } |
| |
| bool CharSetLeaf::IsSubsetOf(uint level, const CharSetNode* other) const |
| { |
| Assert(level == 0); |
| Assert(other != nullptr); |
| if (other == CharSetFull::TheFullNode) |
| return true; |
| Assert(other->IsLeaf()); |
| CharSetLeaf* otherLeaf = (CharSetLeaf*)other; |
| return vec.IsSubsetOf(otherLeaf->vec); |
| } |
| |
| bool CharSetLeaf::IsEqualTo(uint level, const CharSetNode* other) const |
| { |
| Assert(level == 0); |
| Assert(other != nullptr); |
| if (other == CharSetFull::TheFullNode) |
| return false; |
| Assert(other->IsLeaf()); |
| CharSetLeaf* otherLeaf = (CharSetLeaf*)other; |
| return vec.IsSubsetOf(otherLeaf->vec); |
| } |
| |
| uint CharSetLeaf::Count(uint level) const |
| { |
| Assert(level == 0); |
| return vec.Count(); |
| } |
| |
| _Success_(return) |
| bool CharSetLeaf::GetNextRange(uint level, Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) const |
| { |
| Assert(searchCharStart < lim(level) + 1); |
| int nextSet = vec.NextSet(searchCharStart); |
| |
| if (nextSet == -1) |
| { |
| return false; |
| } |
| |
| *outLowerChar = (char16)nextSet; |
| |
| int nextClear = vec.NextClear(nextSet); |
| |
| *outHigherChar = UTC(nextClear == -1 ? lim(level) : nextClear - 1); |
| |
| return true; |
| } |
| |
| #if DBG |
| bool CharSetLeaf::IsLeaf() const |
| { |
| return true; |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // CharSet<char16> |
| // ---------------------------------------------------------------------- |
| |
| void CharSet<char16>::SwitchRepresentations(ArenaAllocator* allocator) |
| { |
| Assert(IsCompact()); |
| uint existCount = this->GetCompactLength(); |
| __assume(existCount <= MaxCompact); |
| if (existCount <= MaxCompact) |
| { |
| Char existCs[MaxCompact]; |
| for (uint i = 0; i < existCount; i++) |
| { |
| existCs[i] = GetCompactChar(i); |
| } |
| rep.full.root = nullptr; |
| rep.full.direct.Clear(); |
| for (uint i = 0; i < existCount; i++) |
| Set(allocator, existCs[i]); |
| } |
| } |
| |
| void CharSet<char16>::Sort() |
| { |
| Assert(IsCompact()); |
| __assume(this->GetCompactLength() <= MaxCompact); |
| for (uint i = 1; i < this->GetCompactLength(); i++) |
| { |
| uint curr = GetCompactCharU(i); |
| for (uint j = 0; j < i; j++) |
| { |
| if (GetCompactCharU(j) > curr) |
| { |
| for (int k = i; k > (int)j; k--) |
| { |
| this->ReplaceCompactCharU(k, this->GetCompactCharU(k - 1)); |
| } |
| this->ReplaceCompactCharU(j, curr); |
| break; |
| } |
| } |
| } |
| } |
| |
| CharSet<char16>::CharSet() |
| { |
| Assert(sizeof(Node*) == sizeof(size_t)); |
| Assert(sizeof(CompactRep) == sizeof(FullRep)); |
| rep.compact.countPlusOne = 1; |
| for (int i = 0; i < MaxCompact; i++) |
| rep.compact.cs[i] = emptySlot; |
| } |
| |
| void CharSet<char16>::FreeBody(ArenaAllocator* allocator) |
| { |
| if (!IsCompact() && rep.full.root != nullptr) |
| { |
| rep.full.root->FreeSelf(allocator); |
| #if DBG |
| rep.full.root = nullptr; |
| #endif |
| } |
| } |
| |
| void CharSet<char16>::Clear(ArenaAllocator* allocator) |
| { |
| if (!IsCompact() && rep.full.root != nullptr) |
| rep.full.root->FreeSelf(allocator); |
| rep.compact.countPlusOne = 1; |
| for (int i = 0; i < MaxCompact; i++) |
| rep.compact.cs[i] = emptySlot; |
| } |
| |
| void CharSet<char16>::CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other) |
| { |
| Clear(allocator); |
| Assert(IsCompact()); |
| if (other.IsCompact()) |
| { |
| this->SetCompactLength(other.GetCompactLength()); |
| for (uint i = 0; i < other.GetCompactLength(); i++) |
| { |
| this->ReplaceCompactCharU(i, other.GetCompactCharU(i)); |
| } |
| } |
| else |
| { |
| rep.full.root = other.rep.full.root == nullptr ? nullptr : other.rep.full.root->Clone(allocator); |
| rep.full.direct.CloneFrom(other.rep.full.direct); |
| } |
| } |
| |
| void CharSet<char16>::CloneNonSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<Char>& other) |
| { |
| if (this->IsCompact()) |
| { |
| for (uint i = 0; i < this->GetCompactLength(); i++) |
| { |
| Char c = this->GetCompactChar(i); |
| uint uChar = CTU(c); |
| if (uChar < 0xD800 || uChar > 0xDFFF) |
| { |
| other.Set(allocator, c); |
| } |
| } |
| } |
| else |
| { |
| other.rep.full.direct.CloneFrom(rep.full.direct); |
| if (rep.full.root == nullptr) |
| { |
| other.rep.full.root = nullptr; |
| } |
| else |
| { |
| other.rep.full.root = rep.full.root->Clone(allocator); |
| other.rep.full.root->ClearRange(allocator, CharSetNode::levels - 1, 0xD800, 0XDFFF); |
| } |
| } |
| } |
| |
| void CharSet<char16>::CloneSurrogateCodeUnitsTo(ArenaAllocator* allocator, CharSet<Char>& other) |
| { |
| if (this->IsCompact()) |
| { |
| for (uint i = 0; i < this->GetCompactLength(); i++) |
| { |
| Char c = this->GetCompactChar(i); |
| uint uChar = CTU(c); |
| if (0xD800 <= uChar && uChar <= 0xDFFF) |
| { |
| other.Set(allocator, c); |
| } |
| } |
| } |
| else |
| { |
| other.rep.full.direct.CloneFrom(rep.full.direct); |
| if (rep.full.root == nullptr) |
| { |
| other.rep.full.root = nullptr; |
| } |
| else |
| { |
| other.rep.full.root = rep.full.root->Clone(allocator); |
| other.rep.full.root->ClearRange(allocator, CharSetNode::levels - 1, 0, 0xD7FF); |
| } |
| } |
| } |
| |
| |
| void CharSet<char16>::SubtractRange(ArenaAllocator* allocator, Char lowerChar, Char higherChar) |
| { |
| uint lowerValue = CTU(lowerChar); |
| uint higherValue = CTU(higherChar); |
| |
| if (higherValue < lowerValue) |
| return; |
| |
| if (IsCompact()) |
| { |
| for (uint i = 0; i < this->GetCompactLength(); ) |
| { |
| uint value = this->GetCompactCharU(i); |
| |
| if (value >= lowerValue && value <= higherValue) |
| { |
| this->RemoveCompactChar(i); |
| } |
| else |
| { |
| i++; |
| } |
| } |
| } |
| else if(lowerValue == 0 && higherValue == MaxUChar) |
| { |
| this->Clear(allocator); |
| } |
| else |
| { |
| if (lowerValue < CharSetNode::directSize) |
| { |
| uint maxDirectValue = min(higherValue, CharSetNode::directSize - 1); |
| rep.full.direct.ClearRange(lowerValue, maxDirectValue); |
| } |
| |
| if (rep.full.root != nullptr) |
| { |
| rep.full.root = rep.full.root->ClearRange(allocator, CharSetNode::levels - 1, lowerValue, higherValue); |
| } |
| } |
| } |
| |
| void CharSet<char16>::SetRange(ArenaAllocator* allocator, Char lc, Char hc) |
| { |
| uint l = CTU(lc); |
| uint h = CTU(hc); |
| if (h < l) |
| return; |
| |
| if (IsCompact()) |
| { |
| if (h - l < MaxCompact) |
| { |
| do |
| { |
| uint i; |
| for (i = 0; i < this->GetCompactLength(); i++) |
| { |
| __assume(l <= MaxUChar); |
| if (l <= MaxUChar && i < MaxCompact) |
| { |
| if (this->GetCompactCharU(i) == l) |
| break; |
| } |
| } |
| if (i == this->GetCompactLength()) |
| { |
| // Character not already in compact set |
| if (i < MaxCompact) |
| { |
| this->AddCompactCharU(l); |
| } |
| else |
| // Must switch representations |
| break; |
| } |
| l++; |
| } |
| while (l <= h); |
| if (h < l) |
| // All chars are now in compact set |
| return; |
| // else: fall-through to general case for remaining chars |
| } |
| // else: no use even trying |
| |
| SwitchRepresentations(allocator); |
| } |
| |
| Assert(!IsCompact()); |
| |
| if (l == 0 && h == MaxUChar) |
| { |
| rep.full.direct.SetRange(0, CharSetNode::directSize - 1); |
| if (rep.full.root != nullptr) |
| rep.full.root->FreeSelf(allocator); |
| rep.full.root = CharSetFull::TheFullNode; |
| } |
| else |
| { |
| if (l < CharSetNode::directSize) |
| { |
| if (h < CharSetNode::directSize) |
| { |
| rep.full.direct.SetRange(l, h); |
| return; |
| } |
| rep.full.direct.SetRange(l, CharSetNode::directSize - 1); |
| l = CharSetNode::directSize; |
| } |
| |
| if (rep.full.root == nullptr) |
| rep.full.root = Anew(allocator, CharSetInner); |
| rep.full.root = rep.full.root->Set(allocator, CharSetNode::levels - 1, l, h); |
| } |
| } |
| |
| void CharSet<char16>::SetRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs) |
| { |
| for (int i = 0; i < numSortedPairs * 2; i += 2) |
| { |
| Assert(i == 0 || sortedPairs[i-1] < sortedPairs[i]); |
| Assert(sortedPairs[i] <= sortedPairs[i+1]); |
| SetRange(allocator, sortedPairs[i], sortedPairs[i+1]); |
| } |
| } |
| |
| void CharSet<char16>::SetNotRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs) |
| { |
| if (numSortedPairs == 0) |
| SetRange(allocator, MinChar, MaxChar); |
| else |
| { |
| if (sortedPairs[0] != MinChar) |
| SetRange(allocator, MinChar, sortedPairs[0] - 1); |
| for (int i = 1; i < numSortedPairs * 2 - 1; i += 2) |
| SetRange(allocator, sortedPairs[i] + 1, sortedPairs[i+1] - 1); |
| if (sortedPairs[numSortedPairs * 2 - 1] != MaxChar) |
| SetRange(allocator, sortedPairs[numSortedPairs * 2 - 1] + 1, MaxChar); |
| } |
| } |
| |
| void CharSet<char16>::UnionInPlace(ArenaAllocator* allocator, const CharSet<Char>& other) |
| { |
| if (other.IsCompact()) |
| { |
| for (uint i = 0; i < other.GetCompactLength(); i++) |
| { |
| Set(allocator, other.GetCompactChar(i)); |
| } |
| return; |
| } |
| |
| if (IsCompact()) |
| SwitchRepresentations(allocator); |
| |
| Assert(!IsCompact() && !other.IsCompact()); |
| |
| rep.full.direct.UnionInPlace(other.rep.full.direct); |
| |
| if (other.rep.full.root != nullptr) |
| { |
| if (other.rep.full.root == CharSetFull::TheFullNode) |
| { |
| if (rep.full.root != nullptr) |
| rep.full.root->FreeSelf(allocator); |
| rep.full.root = CharSetFull::TheFullNode; |
| } |
| else |
| { |
| if (rep.full.root == nullptr) |
| rep.full.root = Anew(allocator, CharSetInner); |
| rep.full.root = rep.full.root->UnionInPlace(allocator, CharSetNode::levels - 1, other.rep.full.root); |
| } |
| } |
| } |
| _Success_(return) |
| bool CharSet<char16>::GetNextRange(Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) |
| { |
| int count = this->Count(); |
| if (count == 0) |
| { |
| return false; |
| } |
| else if (count == 1) |
| { |
| Char singleton = this->Singleton(); |
| if (singleton < searchCharStart) |
| { |
| return false; |
| } |
| |
| *outLowerChar = *outHigherChar = singleton; |
| |
| return true; |
| } |
| |
| if (IsCompact()) |
| { |
| this->Sort(); |
| uint i = 0; |
| size_t compactLength = this->GetCompactLength(); |
| for (; i < compactLength; i++) |
| { |
| Char nextChar = this->GetCompactChar(i); |
| if (nextChar >= searchCharStart) |
| { |
| *outLowerChar = *outHigherChar = nextChar; |
| break; |
| } |
| } |
| |
| if (i == compactLength) |
| { |
| return false; |
| } |
| |
| i++; |
| |
| for (; i < compactLength; i++) |
| { |
| Char nextChar = this->GetCompactChar(i); |
| if (nextChar != *outHigherChar + 1) |
| { |
| return true; |
| } |
| *outHigherChar += 1; |
| } |
| |
| return true; |
| } |
| else |
| { |
| bool found = false; |
| if (CTU(searchCharStart) < CharSetNode::directSize) |
| { |
| int nextSet = rep.full.direct.NextSet(searchCharStart); |
| |
| if (nextSet != -1) |
| { |
| found = true; |
| |
| *outLowerChar = (char16)nextSet; |
| |
| int nextClear = rep.full.direct.NextClear(nextSet); |
| |
| if (nextClear != -1) |
| { |
| *outHigherChar = UTC(nextClear - 1); |
| return true; |
| } |
| |
| *outHigherChar = CharSetNode::directSize - 1; |
| } |
| } |
| |
| if (rep.full.root == nullptr) |
| { |
| return found; |
| } |
| Char tempLowChar = 0, tempHighChar = 0; |
| |
| if (found) |
| { |
| searchCharStart = *outHigherChar + 1; |
| } |
| else |
| { |
| searchCharStart = searchCharStart > CharSetNode::directSize ? searchCharStart : CharSetNode::directSize; |
| } |
| |
| if (rep.full.root->GetNextRange(CharSetNode::levels - 1, searchCharStart, &tempLowChar, &tempHighChar) && (!found || tempLowChar == *outHigherChar + 1)) |
| { |
| if (!found) |
| { |
| *outLowerChar = tempLowChar; |
| } |
| *outHigherChar = tempHighChar; |
| return true; |
| } |
| |
| return found; |
| } |
| } |
| |
| bool CharSet<char16>::Get_helper(uint k) const |
| { |
| Assert(!IsCompact()); |
| CharSetNode* curr = rep.full.root; |
| for (int level = CharSetNode::levels - 1; level > 0; level--) |
| { |
| if (curr == CharSetFull::TheFullNode) |
| return true; |
| CharSetInner* inner = (CharSetInner*)curr; |
| uint i = CharSetNode::innerIdx(level, k); |
| if (inner->children[i] == 0) |
| return false; |
| else |
| curr = inner->children[i]; |
| } |
| if (curr == CharSetFull::TheFullNode) |
| return true; |
| CharSetLeaf* leaf = (CharSetLeaf*)curr; |
| return leaf->vec.Get(CharSetNode::leafIdx(k)); |
| } |
| |
| void CharSet<char16>::ToComplement(ArenaAllocator* allocator, CharSet<Char>& result) |
| { |
| if (IsCompact()) |
| { |
| Sort(); |
| if (this->GetCompactLength() > 0) |
| { |
| if (this->GetCompactCharU(0) > 0) |
| result.SetRange(allocator, UTC(0), UTC(this->GetCompactCharU(0) - 1)); |
| for (uint i = 0; i < this->GetCompactLength() - 1; i++) |
| { |
| result.SetRange(allocator, UTC(this->GetCompactCharU(i) + 1), UTC(this->GetCompactCharU(i + 1) - 1)); |
| } |
| if (this->GetCompactCharU(this->GetCompactLength() - 1) < MaxUChar) |
| { |
| result.SetRange(allocator, UTC(this->GetCompactCharU(this->GetCompactLength() - 1) + 1), UTC(MaxUChar)); |
| } |
| } |
| else if (this->GetCompactLength() == 0) |
| { |
| result.SetRange(allocator, UTC(0), UTC(MaxUChar)); |
| } |
| } |
| else |
| { |
| rep.full.direct.ToComplement<char16>(allocator, 0, result); |
| if (rep.full.root == nullptr) |
| result.SetRange(allocator, UTC(CharSetNode::directSize), MaxChar); |
| else |
| rep.full.root->ToComplement(allocator, CharSetNode::levels - 1, 0, result); |
| } |
| } |
| |
| void CharSet<char16>::ToEquivClass(ArenaAllocator* allocator, CharSet<Char>& result) |
| { |
| uint tblidx = 0; |
| if (IsCompact()) |
| { |
| Sort(); |
| for (uint i = 0; i < this->GetCompactLength(); i++) |
| { |
| uint acth; |
| Char equivs[CaseInsensitive::EquivClassSize]; |
| if (CaseInsensitive::RangeToEquivClass(tblidx, this->GetCompactCharU(i), this->GetCompactCharU(i), acth, equivs)) |
| { |
| for (int j = 0; j < CaseInsensitive::EquivClassSize; j++) |
| { |
| result.Set(allocator, equivs[j]); |
| } |
| } |
| else |
| { |
| result.Set(allocator, this->GetCompactChar(i)); |
| } |
| } |
| } |
| else |
| { |
| rep.full.direct.ToEquivClass<char16>(allocator, 0, tblidx, result); |
| if (rep.full.root != nullptr) |
| { |
| rep.full.root->ToEquivClassW(allocator, CharSetNode::levels - 1, 0, tblidx, result); |
| } |
| } |
| } |
| |
| void CharSet<char16>::ToEquivClassCP(ArenaAllocator* allocator, CharSet<codepoint_t>& result, codepoint_t baseOffset) |
| { |
| uint tblidx = 0; |
| if (IsCompact()) |
| { |
| Sort(); |
| for (uint i = 0; i < this->GetCompactLength(); i++) |
| { |
| uint acth; |
| codepoint_t equivs[CaseInsensitive::EquivClassSize]; |
| if (CaseInsensitive::RangeToEquivClass(tblidx, this->GetCompactCharU(i) + baseOffset, this->GetCompactCharU(i) + baseOffset, acth, equivs)) |
| { |
| for (int j = 0; j < CaseInsensitive::EquivClassSize; j++) |
| { |
| result.Set(allocator, equivs[j]); |
| } |
| } |
| else |
| { |
| result.Set(allocator, this->GetCompactChar(i) + baseOffset); |
| } |
| } |
| } |
| else |
| { |
| rep.full.direct.ToEquivClass<codepoint_t>(allocator, 0, tblidx, result, baseOffset); |
| if (rep.full.root != nullptr) |
| { |
| rep.full.root->ToEquivClassCP(allocator, CharSetNode::levels - 1, 0, tblidx, result, baseOffset); |
| } |
| } |
| } |
| |
| int CharSet<char16>::GetCompactEntries(uint max, __out_ecount(max) Char* entries) const |
| { |
| Assert(max <= MaxCompact); |
| if (!IsCompact()) |
| return -1; |
| |
| uint count = min(max, (uint)(this->GetCompactLength())); |
| __analysis_assume(count <= max); |
| for (uint i = 0; i < count; i++) |
| { |
| // Bug in oacr. it can't figure out count is less than or equal to max |
| #pragma warning(suppress: 22102) |
| entries[i] = this->GetCompactChar(i); |
| } |
| return static_cast<int>(rep.compact.countPlusOne - 1); |
| } |
| |
| bool CharSet<char16>::IsSubsetOf(const CharSet<Char>& other) const |
| { |
| if (IsCompact()) |
| { |
| for (uint i = 0; i < this->GetCompactLength(); i++) |
| { |
| if (!other.Get(this->GetCompactChar(i))) |
| return false; |
| } |
| return true; |
| } |
| else |
| { |
| if (other.IsCompact()) |
| return false; |
| if (!rep.full.direct.IsSubsetOf(other.rep.full.direct)) |
| return false; |
| if (rep.full.root == nullptr) |
| return true; |
| if (other.rep.full.root == nullptr) |
| return false; |
| return rep.full.root->IsSubsetOf(CharSetNode::levels - 1, other.rep.full.root); |
| } |
| } |
| |
| bool CharSet<char16>::IsEqualTo(const CharSet<Char>& other) const |
| { |
| if (IsCompact()) |
| { |
| if (!other.IsCompact()) |
| return false; |
| if (rep.compact.countPlusOne != other.rep.compact.countPlusOne) |
| return false; |
| for (uint i = 0; i < this->GetCompactLength(); i++) |
| { |
| if (!other.Get(this->GetCompactChar(i))) |
| return false; |
| } |
| return true; |
| } |
| else |
| { |
| if (other.IsCompact()) |
| return false; |
| if (!rep.full.direct.IsEqualTo(other.rep.full.direct)) |
| return false; |
| if ((rep.full.root == nullptr) != (other.rep.full.root == nullptr)) |
| return false; |
| if (rep.full.root == nullptr) |
| return true; |
| return rep.full.root->IsEqualTo(CharSetNode::levels - 1, other.rep.full.root); |
| } |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| // CAUTION: This method is very slow. |
| void CharSet<char16>::Print(DebugWriter* w) const |
| { |
| w->Print(_u("[")); |
| int start = -1; |
| for (uint i = 0; i < NumChars; i++) |
| { |
| if (Get(UTC(i))) |
| { |
| if (start < 0) |
| { |
| start = i; |
| w->PrintEscapedChar(UTC(i)); |
| } |
| } |
| else |
| { |
| if (start >= 0) |
| { |
| if (i > (uint)(start + 1)) |
| { |
| if (i > (uint)(start + 2)) |
| w->Print(_u("-")); |
| w->PrintEscapedChar(UTC(i - 1)); |
| } |
| start = -1; |
| } |
| } |
| } |
| if (start >= 0) |
| { |
| if ((uint)start < MaxUChar - 1) |
| w->Print(_u("-")); |
| w->PrintEscapedChar(MaxChar); |
| } |
| w->Print(_u("]")); |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // CharSet<codepoint_t> |
| // ---------------------------------------------------------------------- |
| CharSet<codepoint_t>::CharSet() |
| { |
| #if DBG |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].IsEmpty(); |
| } |
| #endif |
| } |
| |
| void CharSet<codepoint_t>::FreeBody(ArenaAllocator* allocator) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].FreeBody(allocator); |
| } |
| } |
| |
| void CharSet<codepoint_t>::Clear(ArenaAllocator* allocator) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].Clear(allocator); |
| } |
| } |
| |
| void CharSet<codepoint_t>::CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].Clear(allocator); |
| this->characterPlanes[i].CloneFrom(allocator, other.characterPlanes[i]); |
| } |
| } |
| |
| void CharSet<codepoint_t>::CloneSimpleCharsTo(ArenaAllocator* allocator, CharSet<char16>& other) const |
| { |
| other.CloneFrom(allocator, this->characterPlanes[0]); |
| } |
| |
| void CharSet<codepoint_t>::SetRange(ArenaAllocator* allocator, Char lc, Char hc) |
| { |
| Assert(lc <= hc); |
| |
| int lowerIndex = this->CharToIndex(lc); |
| int upperIndex = this->CharToIndex(hc); |
| |
| if (lowerIndex == upperIndex) |
| { |
| this->characterPlanes[lowerIndex].SetRange(allocator, this->RemoveOffset(lc), this->RemoveOffset(hc)); |
| } |
| else |
| { |
| // Do the partial ranges |
| char16 partialLower = this->RemoveOffset(lc); |
| char16 partialHigher = this->RemoveOffset(hc); |
| |
| if (partialLower != 0) |
| { |
| this->characterPlanes[lowerIndex].SetRange(allocator, partialLower, Chars<char16>::MaxUChar); |
| lowerIndex++; |
| } |
| |
| for(; lowerIndex < upperIndex; lowerIndex++) |
| { |
| this->characterPlanes[lowerIndex].SetRange(allocator, 0, Chars<char16>::MaxUChar); |
| } |
| |
| this->characterPlanes[upperIndex].SetRange(allocator, 0, partialHigher); |
| } |
| } |
| |
| void CharSet<codepoint_t>::SetRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs) |
| { |
| for (int i = 0; i < numSortedPairs * 2; i += 2) |
| { |
| Assert(i == 0 || sortedPairs[i-1] < sortedPairs[i]); |
| Assert(sortedPairs[i] <= sortedPairs[i+1]); |
| SetRange(allocator, sortedPairs[i], sortedPairs[i+1]); |
| } |
| } |
| void CharSet<codepoint_t>::SetNotRanges(ArenaAllocator* allocator, int numSortedPairs, const Char* sortedPairs) |
| { |
| if (numSortedPairs == 0) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].SetRange(allocator, 0, Chars<char16>::MaxUChar); |
| } |
| } |
| else |
| { |
| if (sortedPairs[0] != MinChar) |
| { |
| SetRange(allocator, MinChar, sortedPairs[0] - 1); |
| } |
| |
| for (int i = 1; i < numSortedPairs * 2 - 1; i += 2) |
| { |
| SetRange(allocator, sortedPairs[i] + 1, sortedPairs[i+1] - 1); |
| } |
| |
| if (sortedPairs[numSortedPairs * 2 - 1] != MaxChar) |
| { |
| SetRange(allocator, sortedPairs[numSortedPairs * 2 - 1] + 1, MaxChar); |
| } |
| } |
| } |
| void CharSet<codepoint_t>::UnionInPlace(ArenaAllocator* allocator, const CharSet<Char>& other) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].UnionInPlace(allocator, other.characterPlanes[i]); |
| } |
| } |
| |
| void CharSet<codepoint_t>::UnionInPlace(ArenaAllocator* allocator, const CharSet<char16>& other) |
| { |
| this->characterPlanes[0].UnionInPlace(allocator, other); |
| } |
| |
| _Success_(return) |
| bool CharSet<codepoint_t>::GetNextRange(Char searchCharStart, _Out_ Char *outLowerChar, _Out_ Char *outHigherChar) |
| { |
| Assert(outLowerChar != nullptr); |
| Assert(outHigherChar != nullptr); |
| if (searchCharStart >= 0x110000) |
| { |
| return false; |
| } |
| |
| char16 currentLowChar = 1, currentHighChar = 0; |
| int index = this->CharToIndex(searchCharStart); |
| char16 offsetLessSearchCharStart = this->RemoveOffset(searchCharStart); |
| |
| for (; index < NumberOfPlanes; index++) |
| { |
| if (this->characterPlanes[index].GetNextRange(offsetLessSearchCharStart, ¤tLowChar, ¤tHighChar)) |
| { |
| break; |
| } |
| offsetLessSearchCharStart = 0x0; |
| } |
| |
| if (index == NumberOfPlanes) |
| { |
| return false; |
| } |
| Assert(currentHighChar >= currentLowChar); |
| // else found range |
| *outLowerChar = this->AddOffset(currentLowChar, index); |
| *outHigherChar = this->AddOffset(currentHighChar, index); |
| |
| // Check if range crosses plane boundaries |
| index ++; |
| for (; index < NumberOfPlanes; index++) |
| { |
| if (!this->characterPlanes[index].GetNextRange(0x0, ¤tLowChar, ¤tHighChar) || *outHigherChar + 1 != this->AddOffset(currentLowChar, index)) |
| { |
| break; |
| } |
| Assert(this->AddOffset(currentHighChar, index) > *outHigherChar); |
| *outHigherChar = this->AddOffset(currentHighChar, index); |
| } |
| |
| return true; |
| } |
| |
| void CharSet<codepoint_t>::ToComplement(ArenaAllocator* allocator, CharSet<Char>& result) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].ToComplement(allocator, result.characterPlanes[i]); |
| } |
| } |
| |
| void CharSet<codepoint_t>::ToSimpleComplement(ArenaAllocator* allocator, CharSet<Char>& result) |
| { |
| this->characterPlanes[0].ToComplement(allocator, result.characterPlanes[0]); |
| } |
| |
| void CharSet<codepoint_t>::ToSimpleComplement(ArenaAllocator* allocator, CharSet<char16>& result) |
| { |
| this->characterPlanes[0].ToComplement(allocator, result); |
| } |
| |
| void CharSet<codepoint_t>::ToEquivClass(ArenaAllocator* allocator, CharSet<Char>& result) |
| { |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].ToEquivClassCP(allocator, result, AddOffset(0, i)); |
| } |
| } |
| |
| void CharSet<codepoint_t>::ToSurrogateEquivClass(ArenaAllocator* allocator, CharSet<Char>& result) |
| { |
| this->CloneSimpleCharsTo(allocator, result.characterPlanes[0]); |
| for (int i = 1; i < NumberOfPlanes; i++) |
| { |
| this->characterPlanes[i].ToEquivClassCP(allocator, result, AddOffset(0, i)); |
| } |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| void CharSet<codepoint_t>::Print(DebugWriter* w) const |
| { |
| w->Print(_u("Characters 0 - 65535")); |
| |
| for (int i = 0; i < NumberOfPlanes; i++) |
| { |
| int base = (i + 1) * 0x10000; |
| w->Print(_u("Characters %d - %d"), base, base + 0xFFFF); |
| this->characterPlanes[i].Print(w); |
| } |
| } |
| #endif |
| |
| // ---------------------------------------------------------------------- |
| // RuntimeCharSet<char16> |
| // ---------------------------------------------------------------------- |
| |
| RuntimeCharSet<char16>::RuntimeCharSet() |
| { |
| root = nullptr; |
| direct.Clear(); |
| } |
| |
| void RuntimeCharSet<char16>::FreeBody(ArenaAllocator* allocator) |
| { |
| if (root != nullptr) |
| { |
| root->FreeSelf(allocator); |
| #if DBG |
| root = nullptr; |
| #endif |
| } |
| } |
| |
| void RuntimeCharSet<char16>::CloneFrom(ArenaAllocator* allocator, const CharSet<Char>& other) |
| { |
| Assert(root == nullptr); |
| Assert(direct.Count() == 0); |
| if (other.IsCompact()) |
| { |
| for (uint i = 0; i < other.GetCompactLength(); i++) |
| { |
| uint k = other.GetCompactCharU(i); |
| if (k < CharSetNode::directSize) |
| direct.Set(k); |
| else |
| { |
| if (root == nullptr) |
| root = Anew(allocator, CharSetInner); |
| #if DBG |
| CharSetNode* newRoot = |
| #endif |
| root->Set(allocator, CharSetNode::levels - 1, k, k); |
| #if DBG |
| // NOTE: Since we can add at most MaxCompact characters, we can never fill a leaf or inner node, |
| // thus we will never need to reallocated nodes |
| Assert(newRoot == root); |
| #endif |
| } |
| } |
| } |
| else |
| { |
| root = other.rep.full.root == nullptr ? nullptr : other.rep.full.root->Clone(allocator); |
| direct.CloneFrom(other.rep.full.direct); |
| } |
| } |
| |
| bool RuntimeCharSet<char16>::Get_helper(uint k) const |
| { |
| CharSetNode* curr = root; |
| for (int level = CharSetNode::levels - 1; level > 0; level--) |
| { |
| if (curr == CharSetFull::TheFullNode) |
| return true; |
| CharSetInner* inner = (CharSetInner*)curr; |
| uint i = CharSetNode::innerIdx(level, k); |
| if (inner->children[i] == 0) |
| return false; |
| else |
| curr = inner->children[i]; |
| } |
| if (curr == CharSetFull::TheFullNode) |
| return true; |
| CharSetLeaf* leaf = (CharSetLeaf*)curr; |
| return leaf->vec.Get(CharSetNode::leafIdx(k)); |
| } |
| |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| // CAUTION: This method is very slow. |
| void RuntimeCharSet<char16>::Print(DebugWriter* w) const |
| { |
| w->Print(_u("[")); |
| int start = -1; |
| for (uint i = 0; i < NumChars; i++) |
| { |
| if (Get(UTC(i))) |
| { |
| if (start < 0) |
| { |
| start = i; |
| w->PrintEscapedChar(UTC(i)); |
| } |
| } |
| else |
| { |
| if (start >= 0) |
| { |
| if (i > (uint)(start + 1)) |
| { |
| if (i > (uint)(start + 2)) |
| w->Print(_u("-")); |
| w->PrintEscapedChar(UTC(i - 1)); |
| } |
| start = -1; |
| } |
| } |
| } |
| if (start >= 0) |
| { |
| if ((uint)start < MaxUChar - 1) |
| w->Print(_u("-")); |
| w->PrintEscapedChar(MaxChar); |
| } |
| w->Print(_u("]")); |
| } |
| #endif |
| |
| // The VS2013 linker treats this as a redefinition of an already |
| // defined constant and complains. So skip the declaration if we're compiling |
| // with VS2013 or below. |
| #if !defined(_MSC_VER) || _MSC_VER >= 1900 |
| const int CharSetNode::directBits; |
| const uint CharSetNode::directSize; |
| const uint CharSetNode::innerMask; |
| const int CharSetNode::bitsPerLeafLevel; |
| const int CharSetNode::branchingPerLeafLevel; |
| const uint CharSetNode::leafMask; |
| const uint CharSetNode::levels; |
| #endif |
| } |