| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| #include "ParserPch.h" |
| |
| /***************************************************************************** |
| * |
| * The following table speeds various tests of characters, such as whether |
| * a given character can be part of an identifier, and so on. |
| */ |
| |
| int CountNewlines(LPCOLESTR psz) |
| { |
| int cln = 0; |
| |
| while (0 != *psz) |
| { |
| switch (*psz++) |
| { |
| case _u('\xD'): |
| if (*psz == _u('\xA')) |
| { |
| ++psz; |
| } |
| // fall-through |
| case _u('\xA'): |
| cln++; |
| break; |
| } |
| } |
| |
| return cln; |
| } |
| |
| BOOL Token::IsKeyword() const |
| { |
| // keywords (but not future reserved words) |
| return (tk <= tkYIELD); |
| } |
| |
| tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser) |
| { |
| Assert(parser); |
| |
| if(pattern) |
| parser->RegisterRegexPattern(pattern); |
| this->u.pattern = pattern; |
| return tk = tkRegExp; |
| } |
| |
| IdentPtr Token::CreateIdentifier(HashTbl * hashTbl) |
| { |
| Assert(this->u.pid == nullptr); |
| if (this->u.pchMin) |
| { |
| Assert(IsIdentifier()); |
| IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length); |
| this->u.pid = pid; |
| return pid; |
| } |
| |
| Assert(IsReservedWord()); |
| |
| IdentPtr pid = hashTbl->PidFromTk(tk); |
| this->u.pid = pid; |
| return pid; |
| } |
| |
| template <typename EncodingPolicy> |
| Scanner<EncodingPolicy>::Scanner(Parser* parser, Token *ptoken, Js::ScriptContext* scriptContext) |
| { |
| Assert(ptoken); |
| m_parser = parser; |
| m_ptoken = ptoken; |
| m_scriptContext = scriptContext; |
| |
| m_tempChBuf.m_pscanner = this; |
| m_tempChBufSecondary.m_pscanner = this; |
| |
| this->charClassifier = scriptContext->GetCharClassifier(); |
| this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled(); |
| |
| ClearStates(); |
| } |
| |
| template <typename EncodingPolicy> |
| Scanner<EncodingPolicy>::~Scanner(void) |
| { |
| } |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::ClearStates() |
| { |
| m_pchBase = nullptr; |
| m_pchLast = nullptr; |
| m_pchMinLine = nullptr; |
| m_pchMinTok = nullptr; |
| m_currentCharacter = nullptr; |
| m_pchPrevLine = nullptr; |
| |
| m_cMinTokMultiUnits = 0; |
| m_cMinLineMultiUnits = 0; |
| |
| m_fStringTemplateDepth = 0; |
| |
| m_fHadEol = FALSE; |
| m_fIsModuleCode = FALSE; |
| m_doubleQuoteOnLastTkStrCon = FALSE; |
| m_OctOrLeadingZeroOnLastTKNumber = false; |
| m_EscapeOnLastTkStrCon = false; |
| m_fNextStringTemplateIsTagged = false; |
| m_DeferredParseFlags = ScanFlagNone; |
| |
| m_fYieldIsKeywordRegion = false; |
| m_fAwaitIsKeywordRegion = false; |
| |
| m_line = 0; |
| m_scanState = ScanStateNormal; |
| |
| m_ichMinError = 0; |
| m_ichLimError = 0; |
| |
| m_startLine = 0; |
| m_pchStartLine = NULL; |
| |
| m_iecpLimTokPrevious = (size_t)-1; |
| m_ichLimTokPrevious = (charcount_t)-1; |
| } |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::Clear() |
| { |
| EncodingPolicy::Clear(); |
| ClearStates(); |
| this->m_tempChBuf.Clear(); |
| this->m_tempChBufSecondary.Clear(); |
| } |
| |
| /***************************************************************************** |
| * |
| * Initializes the scanner to prepare to scan the given source text. |
| */ |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber) |
| { |
| // Save the start of the script and add the offset to get the point where we should start scanning. |
| m_pchBase = pszSrc; |
| m_pchLast = m_pchBase + offset + length; |
| m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset; |
| |
| this->RestoreMultiUnits(offset - charOffset); |
| |
| // Absorb any byte order mark at the start |
| if(offset == 0) |
| { |
| switch( this->PeekFull(m_currentCharacter, m_pchLast) ) |
| { |
| case 0xFFEE: // "Opposite" endian BOM |
| // We do not support big-endian encodings |
| // fall-through |
| |
| case 0xFEFF: // "Correct" BOM |
| this->template ReadFull<true>(m_currentCharacter, m_pchLast); |
| break; |
| } |
| } |
| |
| m_line = lineNumber; |
| m_startLine = lineNumber; |
| m_pchStartLine = m_currentCharacter; |
| m_ptoken->tk = tkNone; |
| m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0; |
| m_fHadEol = FALSE; |
| m_DeferredParseFlags = ScanFlagNone; |
| |
| this->SetIsUtf8(isUtf8); |
| } |
| |
| #if ENABLE_BACKGROUND_PARSING |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext) |
| { |
| scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0); |
| scriptContext->GetThreadContext()->GetStandardChars((char16*)0); |
| } |
| #endif |
| |
| //----------------------------------------------------------------------------- |
| // Number of code points from 'first' up to, but not including the next |
| // newline character, embedded NUL, or 'last', depending on which comes first. |
| // |
| // This is used to determine a length of BSTR, which can't contain a NUL character. |
| //----------------------------------------------------------------------------- |
| template <typename EncodingPolicy> |
| charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb) |
| { |
| Assert(cb != nullptr); |
| |
| charcount_t result = 0; |
| EncodedCharPtr p = first; |
| |
| for (;;) |
| { |
| EncodedCharPtr prev = p; |
| switch( this->template ReadFull<false>(p, last) ) |
| { |
| case kchNWL: // _C_NWL |
| case kchRET: |
| case kchLS: |
| case kchPS: |
| case kchNUL: // _C_NUL |
| // p is now advanced past the line terminator character. |
| // We need to know the number of bytes making up the line, not including the line terminator character. |
| // To avoid subtracting a variable number of bytes because the line terminator characters are different |
| // number of bytes long (plus there may be multiple valid encodings for these characters) just keep |
| // track of the first byte of the line terminator character in prev. |
| Assert(prev >= first); |
| *cb = prev - first; |
| return result; |
| } |
| result++; |
| } |
| } |
| |
| template <typename EncodingPolicy> |
| charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd) |
| { |
| EncodedCharPtr p = start; |
| charcount_t ich = ichStart; |
| int32 current = line; |
| charcount_t lastStart = ichStart; |
| |
| while (ich < ichEnd) |
| { |
| ich++; |
| switch (this->template ReadFull<false>(p, last)) |
| { |
| case kchRET: |
| if (this->PeekFull(p, last) == kchNWL) |
| { |
| ich++; |
| this->template ReadFull<false>(p, last); |
| } |
| // fall-through |
| |
| case kchNWL: |
| case kchLS: |
| case kchPS: |
| current++; |
| lastStart = ich; |
| break; |
| |
| case kchNUL: |
| goto done; |
| } |
| } |
| |
| done: |
| line = current; |
| return lastStart; |
| } |
| |
| template <typename EncodingPolicy> |
| bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar) |
| { |
| Assert(outChar != nullptr); |
| Assert(startingLocation <= endOfSource); |
| |
| EncodedCharPtr currentLocation = startingLocation; |
| codepoint_t charToOutput = 0x0; |
| |
| // '\' is Assumed as there is only one caller |
| // Read 'u' characters |
| if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u') |
| { |
| return false; |
| } |
| |
| bool expectCurly = false; |
| |
| if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode) |
| { |
| expectCurly = true; |
| // Move past the character |
| this->ReadFirst(currentLocation, endOfSource); |
| } |
| |
| uint i = 0; |
| OLECHAR ch = 0; |
| int hexValue = 0; |
| uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u); |
| |
| for(; i < maxHexDigits && currentLocation < endOfSource; i++) |
| { |
| if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue)) |
| { |
| break; |
| } |
| |
| charToOutput = charToOutput * 0x10 + hexValue; |
| |
| if (charToOutput > 0x10FFFF) |
| { |
| return false; |
| } |
| } |
| |
| //At least 4 characters have to be read |
| if (i == 0 || (i != 4 && !expectCurly)) |
| { |
| return false; |
| } |
| |
| Assert(expectCurly ? es6UnicodeMode : true); |
| |
| if (expectCurly && ch != '}') |
| { |
| return false; |
| } |
| |
| *outChar = charToOutput; |
| startingLocation = currentLocation; |
| return true; |
| } |
| |
| template <typename EncodingPolicy> |
| template <bool bScan> |
| bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar) |
| |
| { |
| Assert(outChar != nullptr); |
| Assert(outContainsMultiUnitChar != nullptr); |
| Assert(es6UnicodeMode); |
| Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower)); |
| |
| EncodedCharPtr currentLocation = startingLocation; |
| *outChar = lower; |
| |
| if (currentLocation < endOfSource) |
| { |
| size_t restorePoint = this->m_cMultiUnits; |
| codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource); |
| |
| if (Js::NumberUtilities::IsSurrogateUpperPart(upper)) |
| { |
| *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper); |
| |
| if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper))) |
| { |
| *outContainsMultiUnitChar = true; |
| } |
| |
| startingLocation = currentLocation; |
| } |
| else |
| { |
| this->RestoreMultiUnits(restorePoint); |
| } |
| } |
| |
| return true; |
| } |
| |
| template <typename EncodingPolicy> |
| template <bool bScan> |
| inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar) |
| { |
| Assert(outChar != nullptr); |
| Assert(outContainsMultiUnitChar != nullptr); |
| |
| if (startingLocation >= endOfSource) |
| { |
| return false; |
| } |
| |
| codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource); |
| if (FBigChar(ch)) |
| { |
| if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch))) |
| { |
| *outContainsMultiUnitChar = true; |
| } |
| |
| if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch)) |
| { |
| return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar); |
| } |
| } |
| else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch)) |
| { |
| *hasEscape = true; |
| } |
| |
| *outChar = ch; |
| return true; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp) |
| { |
| EncodedCharPtr p = *pp; |
| EncodedCharPtr pchMin = p; |
| |
| // JS6 allows unicode characters in the form of \uxxxx escape sequences |
| // to be part of the identifier. |
| bool fHasEscape = false; |
| bool fHasMultiChar = false; |
| |
| codepoint_t codePoint = INVALID_CODEPOINT; |
| size_t multiUnitsBeforeLast = this->m_cMultiUnits; |
| |
| // Check if we started the id |
| if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar)) |
| { |
| // If no chars. could be scanned as part of the identifier, return error. |
| return tkScanError; |
| } |
| |
| Assert(codePoint < 0x110000u); |
| if (!charClassifier->IsIdStart(codePoint)) |
| { |
| // Put back the last character |
| this->RestoreMultiUnits(multiUnitsBeforeLast); |
| |
| // If no chars. could be scanned as part of the identifier, return error. |
| return tkScanError; |
| } |
| |
| return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp); |
| } |
| |
| template <typename EncodingPolicy> |
| BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last) |
| { |
| if (EncodingPolicy::MultiUnitEncoding) |
| { |
| while (p < last) |
| { |
| EncodedChar currentChar = *p; |
| if (this->IsMultiUnitChar(currentChar)) |
| { |
| // multi unit character, we may not have reach the end yet |
| return FALSE; |
| } |
| Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar)); |
| if (!charClassifier->IsIdContinueFast<false>(currentChar)) |
| { |
| // only reach the end of the identifier if it is not the start of an escape sequence |
| return currentChar != '\\'; |
| } |
| p++; |
| } |
| // We have reach the end of the identifier. |
| return TRUE; |
| } |
| |
| // Not fast path for non multi unit encoding |
| return false; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, |
| EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp) |
| { |
| EncodedCharPtr last = m_pchLast; |
| |
| while (true) |
| { |
| // Fast path for utf8, non-multi unit char and not escape |
| if (FastIdentifierContinue(p, last)) |
| { |
| break; |
| } |
| |
| // Slow path that has to deal with multi unit encoding |
| codepoint_t codePoint = INVALID_CODEPOINT; |
| EncodedCharPtr pchBeforeLast = p; |
| size_t multiUnitsBeforeLast = this->m_cMultiUnits; |
| if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar)) |
| { |
| Assert(codePoint < 0x110000u); |
| if (charClassifier->IsIdContinue(codePoint)) |
| { |
| continue; |
| } |
| } |
| |
| // Put back the last character |
| p = pchBeforeLast; |
| this->RestoreMultiUnits(multiUnitsBeforeLast); |
| break; |
| } |
| |
| Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX); |
| |
| *pp = p; |
| |
| if (!identifyKwds) |
| { |
| return tkID; |
| } |
| |
| // UTF16 Scanner are only for syntax coloring, so it shouldn't come here. |
| if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape) |
| { |
| Assert(sizeof(EncodedChar) == 1); |
| |
| // If there are no escape, that the main scan loop would have found the keyword already |
| // So we can just assume it is an ID |
| DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p)); |
| DebugOnly(tokens tk = Ident::TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode())); |
| Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword())); |
| |
| m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin)); |
| return tkID; |
| } |
| |
| IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar); |
| m_ptoken->SetIdentifier(pid); |
| |
| if (!fHasEscape) |
| { |
| // If it doesn't have escape, then Scan() should have taken care of keywords (except |
| // yield if m_fYieldIsKeyword is false, in which case yield is treated as an identifier, and except |
| // await if m_fAwaitIsKeyword is false, in which case await is treated as an identifier). |
| // We don't have to check if the name is reserved word and return it as an Identifier |
| Assert(pid->Tk(IsStrictMode()) == tkID |
| || (pid->Tk(IsStrictMode()) == tkYIELD && !this->YieldIsKeyword()) |
| || (pid->Tk(IsStrictMode()) == tkAWAIT && !this->AwaitIsKeyword())); |
| return tkID; |
| } |
| tokens tk = pid->Tk(IsStrictMode()); |
| return tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()) ? tkID : tkNone; |
| } |
| |
| template <typename EncodingPolicy> |
| IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim) |
| { |
| Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin); |
| return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim); |
| } |
| |
| template <typename EncodingPolicy> |
| uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last) |
| { |
| m_tempChBuf.Reset(); |
| while( p < last ) |
| { |
| codepoint_t codePoint; |
| bool hasEscape, isMultiChar; |
| bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar); |
| Assert(gotCodePoint); |
| Assert(codePoint < 0x110000); |
| if (codePoint < 0x10000) |
| { |
| m_tempChBuf.AppendCh((OLECHAR)codePoint); |
| } |
| else |
| { |
| char16 lower, upper; |
| Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper); |
| m_tempChBuf.AppendCh(lower); |
| m_tempChBuf.AppendCh(upper); |
| } |
| } |
| return m_tempChBuf.m_ichCur; |
| } |
| |
| template <typename EncodingPolicy> |
| IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last) |
| { |
| int32 cch = UnescapeToTempBuf(p, last); |
| return this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, cch); |
| } |
| |
| template <typename EncodingPolicy> |
| IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar) |
| { |
| // If there is an escape sequence in the JS6 identifier or it is a UTF8 |
| // source then we have to convert it to the equivalent char so we use a |
| // buffer for translation. |
| if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape) |
| { |
| return PidOfIdentiferAt(p, last); |
| } |
| else if (EncodingPolicy::MultiUnitEncoding) |
| { |
| Assert(sizeof(EncodedChar) == 1); |
| return this->GetHashTbl()->PidHashNameLen(reinterpret_cast<const char *>(p), reinterpret_cast<const char *>(last), (int32)(last - p)); |
| } |
| else |
| { |
| Assert(sizeof(EncodedChar) == 2); |
| return this->GetHashTbl()->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p)); |
| } |
| } |
| |
| template <typename EncodingPolicy> |
| typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, LikelyNumberType& likelyType, size_t savedMultiUnits) |
| { |
| EncodedCharPtr last = m_pchLast; |
| EncodedCharPtr pchT = nullptr; |
| bool baseSpecified = false; |
| likelyType = LikelyNumberType::Int; |
| // Reset |
| m_OctOrLeadingZeroOnLastTKNumber = false; |
| |
| auto baseSpecifierCheck = [&pchT, &pdbl, p, &baseSpecified]() |
| { |
| if (pchT == p + 2) |
| { |
| // An octal token '0' was followed by a base specifier: /0[xXoObB]/ |
| // This literal can no longer be a double |
| *pdbl = 0; |
| // Advance the character pointer to the base specifier |
| pchT = p + 1; |
| // Set the flag so we know to offset the potential identifier search after the literal |
| baseSpecified = true; |
| } |
| }; |
| |
| if ('0' == this->PeekFirst(p, last)) |
| { |
| switch(this->PeekFirst(p + 1, last)) |
| { |
| case '.': |
| case 'e': |
| case 'E': |
| case 'n': |
| likelyType = LikelyNumberType::Double; |
| // Floating point |
| goto LFloat; |
| |
| case 'x': |
| case 'X': |
| // Hex |
| *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT); |
| baseSpecifierCheck(); |
| goto LIdCheck; |
| case 'o': |
| case 'O': |
| // Octal |
| *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT); |
| baseSpecifierCheck(); |
| goto LIdCheck; |
| |
| case 'b': |
| case 'B': |
| // Binary |
| *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT); |
| baseSpecifierCheck(); |
| goto LIdCheck; |
| |
| default: |
| // Octal |
| *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT); |
| Assert(pchT > p); |
| |
| #if !SOURCERELEASE |
| // If an octal literal is malformed then it is in fact a decimal literal. |
| #endif // !SOURCERELEASE |
| if(*pdbl != 0 || pchT > p + 1) |
| m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok |
| |
| switch (*pchT) |
| { |
| case '8': |
| case '9': |
| // case 'e': |
| // case 'E': |
| // case '.': |
| m_OctOrLeadingZeroOnLastTKNumber = false; //08... or 09.... |
| goto LFloat; |
| } |
| goto LIdCheck; |
| } |
| } |
| else |
| { |
| LFloat: |
| *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyType, m_scriptContext->GetConfig()->IsESBigIntEnabled()); |
| Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl)); |
| if (likelyType == LikelyNumberType::BigInt) |
| { |
| Assert(*pdbl == 0); |
| } |
| // fall through to LIdCheck |
| } |
| |
| LIdCheck: |
| // https://tc39.github.io/ecma262/#sec-literals-numeric-literals |
| // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit. |
| // For example : 3in is an error and not the two input elements 3 and in |
| // If a base was speficied, use the first character denoting the constant. In this case, pchT is pointing to the base specifier. |
| EncodedCharPtr startingLocation = baseSpecified ? pchT + 1 : pchT; |
| codepoint_t outChar = *startingLocation; |
| if (this->IsMultiUnitChar((OLECHAR)outChar)) |
| { |
| outChar = this->template ReadRest<true>((OLECHAR)outChar, startingLocation, last); |
| } |
| if (this->charClassifier->IsIdStart(outChar)) |
| { |
| this->RestoreMultiUnits(savedMultiUnits); |
| Error(ERRIdAfterLit); |
| } |
| |
| // IsIdStart does not cover the unicode escape case. Try to read a unicode escape from the 'u' char. |
| if (*pchT == '\\') |
| { |
| startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand. |
| if (TryReadEscape(startingLocation, m_pchLast, &outChar)) |
| { |
| this->RestoreMultiUnits(savedMultiUnits); |
| Error(ERRIdAfterLit); |
| } |
| } |
| |
| if (Js::NumberUtilities::IsDigit(*startingLocation)) |
| { |
| this->RestoreMultiUnits(savedMultiUnits); |
| Error(ERRbadNumber); |
| } |
| |
| return pchT; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::TryRescanRegExp() |
| { |
| EncodedCharPtr current = m_currentCharacter; |
| tokens result = RescanRegExp(); |
| if (result == tkScanError) |
| m_currentCharacter = current; |
| return result; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::RescanRegExp() |
| { |
| #if DEBUG |
| switch (m_ptoken->tk) |
| { |
| case tkDiv: |
| Assert(m_currentCharacter == m_pchMinTok + 1); |
| break; |
| case tkAsgDiv: |
| Assert(m_currentCharacter == m_pchMinTok + 2); |
| break; |
| default: |
| AssertMsg(FALSE, "Who is calling RescanRegExp?"); |
| break; |
| } |
| #endif //DEBUG |
| |
| m_currentCharacter = m_pchMinTok; |
| if (*m_currentCharacter != '/') |
| Error(ERRnoSlash); |
| m_currentCharacter++; |
| |
| tokens tk = tkNone; |
| |
| { |
| ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc); |
| tk = ScanRegExpConstant(&alloc); |
| } |
| |
| return tk; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::RescanRegExpNoAST() |
| { |
| #if DEBUG |
| switch (m_ptoken->tk) |
| { |
| case tkDiv: |
| Assert(m_currentCharacter == m_pchMinTok + 1); |
| break; |
| case tkAsgDiv: |
| Assert(m_currentCharacter == m_pchMinTok + 2); |
| break; |
| default: |
| AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?"); |
| break; |
| } |
| #endif //DEBUG |
| |
| m_currentCharacter = m_pchMinTok; |
| if (*m_currentCharacter != '/') |
| Error(ERRnoSlash); |
| m_currentCharacter++; |
| |
| tokens tk = tkNone; |
| |
| { |
| ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc); |
| { |
| tk = ScanRegExpConstantNoAST(&alloc); |
| } |
| } |
| |
| return tk; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer() |
| { |
| #if DEBUG |
| switch (m_ptoken->tk) |
| { |
| case tkDiv: |
| Assert(m_currentCharacter == m_pchMinTok + 1); |
| break; |
| case tkAsgDiv: |
| Assert(m_currentCharacter == m_pchMinTok + 2); |
| break; |
| default: |
| AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?"); |
| break; |
| } |
| #endif //DEBUG |
| |
| m_currentCharacter = m_pchMinTok; |
| if (*m_currentCharacter != '/') |
| Error(ERRnoSlash); |
| m_currentCharacter++; |
| |
| tokens tk = tkNone; |
| |
| ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread(); |
| threadContext->EnsureRecycler(); |
| Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp")); |
| TryFinally( |
| [&]() /* try block */ |
| { |
| tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator()); |
| }, |
| [&](bool /* hasException */) /* finally block */ |
| { |
| threadContext->ReleaseTemporaryAllocator(alloc); |
| }); |
| |
| return tk; |
| } |
| |
| template <typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc) |
| { |
| PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex); |
| |
| // SEE ALSO: RegexHelper::PrimCompileDynamic() |
| |
| #ifdef PROFILE_EXEC |
| m_scriptContext->ProfileBegin(Js::RegexCompilePhase); |
| #endif |
| ArenaAllocator* ctAllocator = alloc; |
| UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0); |
| UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0); |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| UnifiedRegex::DebugWriter *w = 0; |
| if (REGEX_CONFIG_FLAG(RegexDebug)) |
| w = m_scriptContext->GetRegexDebugWriter(); |
| if (REGEX_CONFIG_FLAG(RegexProfile)) |
| m_scriptContext->GetRegexStatsDatabase()->BeginProfile(); |
| #endif |
| UnifiedRegex::Node* root = 0; |
| charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0; |
| UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags; |
| UnifiedRegex::Parser<EncodingPolicy, true> parser |
| ( m_scriptContext |
| , ctAllocator |
| , standardEncodedChars |
| , standardChars |
| , this->IsUtf8() |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , w |
| #endif |
| ); |
| try |
| { |
| root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags); |
| } |
| catch (UnifiedRegex::ParseError e) |
| { |
| #ifdef PROFILE_EXEC |
| m_scriptContext->ProfileEnd(Js::RegexCompilePhase); |
| #endif |
| m_currentCharacter += e.encodedPos; |
| Error(e.error); |
| } |
| |
| UnifiedRegex::RegexPattern* pattern; |
| if (m_parser->IsBackgroundParser()) |
| { |
| // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern |
| // and hook it to this parse node. |
| pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags); |
| } |
| else |
| { |
| pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags); |
| } |
| this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits |
| |
| return m_ptoken->SetRegex(pattern, m_parser); |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc) |
| { |
| PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex); |
| |
| ThreadContext *threadContext = m_scriptContext->GetThreadContext(); |
| UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0); |
| UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0); |
| charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0; |
| UnifiedRegex::Parser<EncodingPolicy, true> parser |
| ( m_scriptContext |
| , alloc |
| , standardEncodedChars |
| , standardChars |
| , this->IsUtf8() |
| #if ENABLE_REGEX_CONFIG_OPTIONS |
| , 0 |
| #endif |
| ); |
| try |
| { |
| parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars); |
| } |
| catch (UnifiedRegex::ParseError e) |
| { |
| m_currentCharacter += e.encodedPos; |
| Error(e.error); |
| // never reached |
| } |
| |
| UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags); |
| Assert(pattern == nullptr); // BuildAST == false, CompileProgram should return nullptr |
| this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits |
| |
| return (m_ptoken->tk = tkRegExp); |
| |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp) |
| { |
| // String template must begin with a string constant followed by '`' or '${' |
| ScanStringConstant<true, true>('`', pp); |
| |
| OLECHAR ch; |
| EncodedCharPtr last = m_pchLast; |
| |
| ch = this->ReadFirst(*pp, last); |
| |
| if (ch == '`') |
| { |
| // Simple string template - no substitutions |
| return tkStrTmplBasic; |
| } |
| else if (ch == '$') |
| { |
| ch = this->ReadFirst(*pp, last); |
| |
| if (ch == '{') |
| { |
| // Next token after expr should be tkStrTmplMid or tkStrTmplEnd. |
| // In string template scanning mode, we expect the next char to be '}' |
| // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid |
| m_fStringTemplateDepth++; |
| |
| // Regular string template begin - next is first substitution |
| return tkStrTmplBegin; |
| } |
| } |
| |
| // Error - make sure pointer stays at the last character of the error token instead of after it in the error case |
| (*pp)--; |
| return ScanError(m_currentCharacter, tkStrTmplBegin); |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp) |
| { |
| // String template middle and end tokens must begin with a string constant |
| ScanStringConstant<true, true>('`', pp); |
| |
| OLECHAR ch; |
| EncodedCharPtr last = m_pchLast; |
| |
| ch = this->ReadFirst(*pp, last); |
| |
| if (ch == '`') |
| { |
| // No longer in string template scanning mode |
| m_fStringTemplateDepth--; |
| |
| // This is the last part of the template ...` |
| return tkStrTmplEnd; |
| } |
| else if (ch == '$') |
| { |
| ch = this->ReadFirst(*pp, last); |
| |
| if (ch == '{') |
| { |
| // This is just another middle part of the template }...${ |
| return tkStrTmplMid; |
| } |
| } |
| |
| // Error - make sure pointer stays at the last character of the error token instead of after it in the error case |
| (*pp)--; |
| return ScanError(m_currentCharacter, tkStrTmplEnd); |
| } |
| |
| /***************************************************************************** |
| * |
| * Parses a string constant. Note that the string value is stored in |
| * a volatile buffer (or allocated on the heap if too long), and thus |
| * the string should be saved off before the next token is scanned. |
| */ |
| |
| template<typename EncodingPolicy> |
| template<bool stringTemplateMode, bool createRawString> |
| tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp) |
| { |
| static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value"); |
| |
| OLECHAR ch, c, rawch; |
| int wT; |
| EncodedCharPtr p = *pp; |
| EncodedCharPtr last = m_pchLast; |
| |
| // Reset |
| m_OctOrLeadingZeroOnLastTKNumber = false; |
| m_EscapeOnLastTkStrCon = FALSE; |
| |
| m_tempChBuf.Reset(); |
| |
| // Use template parameter to gate raw string creation. |
| // If createRawString is false, all these operations should be no-ops |
| if (createRawString) |
| { |
| m_tempChBufSecondary.Reset(); |
| } |
| |
| for (;;) |
| { |
| switch ((rawch = ch = this->ReadFirst(p, last))) |
| { |
| case kchRET: |
| if (stringTemplateMode) |
| { |
| if (this->PeekFirst(p, last) == kchNWL) |
| { |
| // Eat the <LF> char, ignore return |
| this->ReadFirst(p, last); |
| } |
| |
| // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values |
| ch = rawch = kchNWL; |
| } |
| |
| LEcmaLineBreak: |
| // Fall through |
| case kchNWL: |
| if (stringTemplateMode) |
| { |
| // Notify the scanner to update current line, number of lines etc |
| NotifyScannedNewLine(); |
| |
| // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out. |
| m_pchMinLine = p; |
| |
| break; |
| } |
| |
| m_currentCharacter = p - 1; |
| Error(ERRnoStrEnd); |
| |
| case '"': |
| case '\'': |
| if (ch == delim) |
| goto LBreak; |
| break; |
| |
| case '`': |
| // In string template scan mode, don't consume the '`' - we need to differentiate |
| // between a closed string template and the expression open sequence - ${ |
| if (stringTemplateMode) |
| { |
| p--; |
| goto LBreak; |
| } |
| |
| // If we aren't scanning for a string template, do the default thing |
| goto LMainDefault; |
| |
| case '$': |
| // If we are parsing a string literal part of a string template, ${ indicates we need to switch |
| // to parsing an expression. |
| if (stringTemplateMode && this->PeekFirst(p, last) == '{') |
| { |
| // Rewind to the $ and return |
| p--; |
| goto LBreak; |
| } |
| |
| // If we aren't scanning for a string template, do the default thing |
| goto LMainDefault; |
| |
| case kchNUL: |
| if (p > last) |
| { |
| m_currentCharacter = p - 1; |
| Error(ERRnoStrEnd); |
| } |
| break; |
| |
| default: |
| LMainDefault: |
| if (this->IsMultiUnitChar(ch)) |
| { |
| if ((ch == kchLS || ch == kchPS)) |
| { |
| goto LEcmaLineBreak; |
| } |
| |
| rawch = ch = this->template ReadRest<true>(ch, p, last); |
| switch (ch) |
| { |
| case kchLS: // 0x2028, classifies as new line |
| case kchPS: // 0x2029, classifies as new line |
| goto LEcmaLineBreak; |
| } |
| } |
| break; |
| |
| case kchBSL: |
| // In raw mode '\\' is not an escape character, just add the char into the raw buffer. |
| m_tempChBufSecondary.template AppendCh<createRawString>(ch); |
| |
| m_EscapeOnLastTkStrCon=TRUE; |
| |
| // In raw mode, we append the raw char itself and not the escaped value so save the char. |
| rawch = ch = this->ReadFirst(p, last); |
| codepoint_t codePoint = 0; |
| uint errorType = (uint)ERRbadHexDigit; |
| switch (ch) |
| { |
| case 'b': |
| ch = 0x08; |
| break; |
| case 't': |
| ch = 0x09; |
| break; |
| case 'v': |
| ch = 0x0B; //Only in ES5 mode |
| break; //same as default |
| case 'n': |
| ch = 0x0A; |
| break; |
| case 'f': |
| ch = 0x0C; |
| break; |
| case 'r': |
| ch = 0x0D; |
| break; |
| case 'x': |
| // Insert the 'x' here before jumping to parse the hex digits. |
| m_tempChBufSecondary.template AppendCh<createRawString>(ch); |
| |
| // 2 hex digits |
| ch = 0; |
| goto LTwoHex; |
| case 'u': |
| // Raw string just inserts a 'u' here. |
| m_tempChBufSecondary.template AppendCh<createRawString>(ch); |
| |
| ch = 0; |
| if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) |
| goto LFourHex; |
| else if (c != '{' || !this->es6UnicodeMode) |
| goto ReturnScanError; |
| |
| Assert(c == '{'); |
| // c should definitely be a '{' which should be appended to the raw string. |
| m_tempChBufSecondary.template AppendCh<createRawString>(c); |
| |
| //At least one digit is expected |
| if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) |
| { |
| goto ReturnScanError; |
| } |
| |
| m_tempChBufSecondary.template AppendCh<createRawString>(c); |
| |
| codePoint = static_cast<codepoint_t>(wT); |
| |
| while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) |
| { |
| m_tempChBufSecondary.template AppendCh<createRawString>(c); |
| codePoint <<= 4; |
| codePoint += static_cast<codepoint_t>(wT); |
| |
| if (codePoint > 0x10FFFF) |
| { |
| errorType = (uint)ERRInvalidCodePoint; |
| goto ReturnScanError; |
| } |
| } |
| |
| if (c != '}') |
| { |
| errorType = (uint)ERRMissingCurlyBrace; |
| goto ReturnScanError; |
| } |
| |
| Assert(codePoint <= 0x10FFFF); |
| |
| if (codePoint >= 0x10000) |
| { |
| OLECHAR lower = 0; |
| Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch); |
| m_tempChBuf.AppendCh(lower); |
| } |
| else |
| { |
| ch = (char16)codePoint; |
| } |
| |
| // In raw mode we want the last hex character or the closing curly. c should hold one or the other. |
| if (createRawString) |
| rawch = c; |
| |
| break; |
| LFourHex: |
| codePoint = 0x0; |
| // Append first hex digit character to the raw string. |
| m_tempChBufSecondary.template AppendCh<createRawString>(c); |
| |
| codePoint += static_cast<codepoint_t>(wT * 0x1000); |
| if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) |
| goto ReturnScanError; |
| |
| // Append fourth (or second) hex digit character to the raw string. |
| m_tempChBufSecondary.template AppendCh<createRawString>(c); |
| |
| codePoint += static_cast<codepoint_t>(wT * 0x0100); |
| |
| LTwoHex: |
| // This code path doesn't expect curly. |
| if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) |
| goto ReturnScanError; |
| |
| // Append first hex digit character to the raw string. |
| m_tempChBufSecondary.template AppendCh<createRawString>(c); |
| |
| codePoint += static_cast<codepoint_t>(wT * 0x0010); |
| |
| if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT)) |
| goto ReturnScanError; |
| |
| codePoint += static_cast<codepoint_t>(wT); |
| |
| // In raw mode we want the last hex character or the closing curly. c should hold one or the other. |
| if (createRawString) |
| rawch = c; |
| |
| if (codePoint < 0x10000) |
| { |
| ch = static_cast<OLECHAR>(codePoint); |
| } |
| else |
| { |
| goto ReturnScanError; |
| } |
| break; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| // 1 to 3 octal digits |
| |
| ch -= '0'; |
| |
| // Octal escape sequences are not allowed inside string template literals |
| if (stringTemplateMode) |
| { |
| c = this->PeekFirst(p, last); |
| if (ch != 0 || (c >= '0' && c <= '7')) |
| { |
| errorType = (uint)ERRES5NoOctal; |
| goto ReturnScanError; |
| } |
| break; |
| } |
| |
| wT = (c = this->ReadFirst(p, last)) - '0'; |
| if ((char16)wT > 7) |
| { |
| if (ch != 0 || ((char16)wT <= 9)) |
| { |
| m_OctOrLeadingZeroOnLastTKNumber = true; |
| } |
| p--; |
| break; |
| } |
| |
| m_OctOrLeadingZeroOnLastTKNumber = true; |
| ch = static_cast< OLECHAR >(ch * 8 + wT); |
| goto LOneOctal; |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| // 1 to 2 octal digits |
| |
| // Octal escape sequences are not allowed inside string template literals |
| if (stringTemplateMode) |
| { |
| errorType = (uint)ERRES5NoOctal; |
| goto ReturnScanError; |
| } |
| |
| ch -= '0'; |
| |
| m_OctOrLeadingZeroOnLastTKNumber = true; |
| |
| LOneOctal: |
| wT = (c = this->ReadFirst(p, last)) - '0'; |
| if ((char16)wT > 7) |
| { |
| p--; |
| break; |
| } |
| |
| ch = static_cast< OLECHAR >(ch * 8 + wT); |
| break; |
| |
| case kchRET: // 0xD |
| if (stringTemplateMode) |
| { |
| // If this is \<CR><LF> we can eat the <LF> right now |
| if (this->PeekFirst(p, last) == kchNWL) |
| { |
| // Eat the <LF> char, ignore return |
| this->ReadFirst(p, last); |
| } |
| |
| // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string |
| rawch = kchNWL; |
| } |
| case kchLS: // 0x2028, classifies as new line |
| case kchPS: // 0x2029, classifies as new line |
| case kchNWL: // 0xA |
| LEcmaEscapeLineBreak: |
| if (stringTemplateMode) |
| { |
| // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings |
| m_tempChBufSecondary.template AppendCh<createRawString>(rawch); |
| |
| // Template literal strings ignore all escaped line continuation tokens |
| NotifyScannedNewLine(); |
| |
| // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out. |
| m_pchMinLine = p; |
| |
| continue; |
| } |
| |
| m_currentCharacter = p; |
| ScanNewLine(ch); |
| p = m_currentCharacter; |
| continue; |
| |
| case 0: |
| if (p >= last) |
| { |
| errorType = (uint)ERRnoStrEnd; |
| |
| ReturnScanError: |
| m_currentCharacter = p - 1; |
| Error(errorType); |
| } |
| else if (stringTemplateMode) |
| { |
| // Escaped null character is translated into 0x0030 for raw template literals |
| rawch = 0x0030; |
| } |
| break; |
| |
| default: |
| if (this->IsMultiUnitChar(ch)) |
| { |
| rawch = ch = this->template ReadRest<true>(ch, p, last); |
| switch (ch) |
| { |
| case kchLS: |
| case kchPS: |
| goto LEcmaEscapeLineBreak; |
| } |
| } |
| break; |
| } |
| break; |
| } |
| |
| m_tempChBuf.AppendCh(ch); |
| m_tempChBufSecondary.template AppendCh<createRawString>(rawch); |
| } |
| |
| LBreak: |
| bool createPid = true; |
| |
| if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0) |
| { |
| createPid = false; |
| |
| if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR)))) |
| { |
| createPid = true; |
| } |
| } |
| |
| if (createPid) |
| { |
| m_ptoken->SetIdentifier(this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur)); |
| } |
| else |
| { |
| m_ptoken->SetIdentifier(NULL); |
| } |
| |
| m_scanState = ScanStateNormal; |
| m_doubleQuoteOnLastTkStrCon = '"' == delim; |
| *pp = p; |
| |
| return tkStrCon; |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp) |
| { |
| return ScanStringConstant<false, false>(delim, pp); |
| } |
| |
| /***************************************************************************** |
| * |
| * Consume a C-style comment. |
| */ |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef) |
| { |
| Assert(containTypeDef != nullptr); |
| EncodedCharPtr p = *pp; |
| *containTypeDef = false; |
| EncodedCharPtr last = m_pchLast; |
| OLECHAR ch; |
| |
| for (;;) |
| { |
| switch((ch = this->ReadFirst(p, last))) |
| { |
| case '*': |
| if (*p == '/') |
| { |
| *pp = p + 1; |
| return tkNone; |
| } |
| break; |
| |
| // ES 2015 11.3 Line Terminators |
| case kchLS: // 0x2028, classifies as new line |
| case kchPS: // 0x2029, classifies as new line |
| LEcmaLineBreak: |
| goto LLineBreak; |
| |
| case kchRET: |
| case kchNWL: |
| LLineBreak: |
| m_fHadEol = TRUE; |
| m_currentCharacter = p; |
| ScanNewLine(ch); |
| p = m_currentCharacter; |
| break; |
| |
| case kchNUL: |
| if (p >= last) |
| { |
| m_currentCharacter = p - 1; |
| *pp = p - 1; |
| Error(ERRnoCmtEnd); |
| } |
| break; |
| |
| default: |
| if (this->IsMultiUnitChar(ch)) |
| { |
| ch = this->template ReadRest<true>(ch, p, last); |
| switch (ch) |
| { |
| case kchLS: |
| case kchPS: |
| goto LEcmaLineBreak; |
| } |
| } |
| break; |
| } |
| } |
| } |
| |
| /***************************************************************************** |
| * |
| * We've encountered a newline - update various counters and things. |
| */ |
| template<typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::ScanNewLine(uint ch) |
| { |
| if (ch == '\r' && PeekNextChar() == '\n') |
| { |
| ReadNextChar(); |
| } |
| |
| NotifyScannedNewLine(); |
| } |
| |
| /***************************************************************************** |
| * |
| * We've encountered a newline - update various counters and things. |
| */ |
| template<typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::NotifyScannedNewLine() |
| { |
| // update in scanner: previous line, current line, number of lines. |
| m_line++; |
| m_pchPrevLine = m_pchMinLine; |
| m_pchMinLine = m_currentCharacter; |
| m_cMinLineMultiUnits = this->m_cMultiUnits; |
| } |
| |
| /***************************************************************************** |
| * |
| * Delivers a token stream. |
| */ |
| |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanForcingPid() |
| { |
| if (m_DeferredParseFlags != ScanFlagNone) |
| { |
| BYTE deferredParseFlagsSave = m_DeferredParseFlags; |
| m_DeferredParseFlags = ScanFlagNone; |
| tokens result = tkEOF; |
| TryFinally( |
| [&]() /* try block */ |
| { |
| result = this->Scan(); |
| }, |
| [&](bool) /* finally block */ |
| { |
| this->m_DeferredParseFlags = deferredParseFlagsSave; |
| }); |
| |
| return result; |
| } |
| return Scan(); |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::Scan() |
| { |
| return ScanCore(true); |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanNoKeywords() |
| { |
| return ScanCore(false); |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanAhead() |
| { |
| return ScanNoKeywords(); |
| } |
| |
| template<typename EncodingPolicy> |
| tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds) |
| { |
| codepoint_t ch; |
| OLECHAR firstChar; |
| OLECHAR secondChar; |
| EncodedCharPtr pchT; |
| size_t multiUnits = 0; |
| EncodedCharPtr p = m_currentCharacter; |
| EncodedCharPtr last = m_pchLast; |
| bool seenDelimitedCommentEnd = false; |
| |
| // store the last token |
| m_tkPrevious = m_ptoken->tk; |
| m_iecpLimTokPrevious = IecpLimTok(); // Introduced for use by lambda parsing to find correct span of expression lambdas |
| m_ichLimTokPrevious = IchLimTok(); |
| size_t savedMultiUnits = this->m_cMultiUnits; |
| |
| if (p >= last) |
| { |
| m_pchMinTok = p; |
| m_cMinTokMultiUnits = this->m_cMultiUnits; |
| goto LEof; |
| } |
| tokens token; |
| m_fHadEol = FALSE; |
| CharTypes chType; |
| charcount_t commentStartLine; |
| |
| if (m_scanState && *p != 0) |
| { |
| if (m_scanState == ScanStateStringTemplateMiddleOrEnd) |
| { |
| AssertMsg(m_fStringTemplateDepth > 0, |
| "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template"); |
| |
| m_scanState = ScanStateNormal; |
| |
| pchT = p; |
| token = ScanStringTemplateMiddleOrEnd(&pchT); |
| p = pchT; |
| |
| goto LDone; |
| } |
| } |
| |
| for (;;) |
| { |
| LLoop: |
| m_pchMinTok = p; |
| m_cMinTokMultiUnits = this->m_cMultiUnits; |
| ch = this->ReadFirst(p, last); |
| #if DEBUG |
| chType = this->charClassifier->GetCharType((OLECHAR)ch); |
| #endif |
| switch (ch) |
| { |
| default: |
| if (ch == kchLS || |
| ch == kchPS ) |
| { |
| goto LNewLine; |
| } |
| { |
| BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch); |
| if (isMultiUnit) |
| { |
| ch = this->template ReadRest<true>((OLECHAR)ch, p, last); |
| } |
| |
| if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch)) |
| { |
| codepoint_t upper = this->PeekFull(p, last); |
| |
| if (Js::NumberUtilities::IsSurrogateUpperPart(upper)) |
| { |
| // Consume the rest of the utf8 bytes for the codepoint |
| OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last); |
| Assert(decodedUpper == (OLECHAR) upper); |
| ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper); |
| } |
| } |
| |
| if (this->charClassifier->IsIdStart(ch)) |
| { |
| // We treat IDContinue as an error. |
| token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p); |
| break; |
| } |
| } |
| |
| chType = this->charClassifier->GetCharType(ch); |
| switch (chType) |
| { |
| case _C_WSP: continue; |
| case _C_NWL: goto LNewLine; |
| // All other types (except errors) are handled by the outer switch. |
| } |
| Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG); |
| m_currentCharacter = p - 1; |
| Error(ERRillegalChar); |
| continue; |
| |
| case '\0': |
| // Put back the null in case we get called again. |
| p--; |
| if (p < last) |
| { |
| // A \0 prior to the end of the text is an invalid character. |
| m_currentCharacter = p; |
| Error(ERRillegalChar); |
| } |
| LEof: |
| Assert(p >= last); |
| token = tkEOF; |
| break; |
| |
| case 0x0009: |
| case 0x000B: |
| case 0x000C: |
| case 0x0020: |
| Assert(chType == _C_WSP); |
| continue; |
| |
| case '.': |
| if (!Js::NumberUtilities::IsDigit(*p)) |
| { |
| // Not a double |
| if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() && |
| this->PeekFirst(p, last) == '.' && |
| this->PeekFirst(p + 1, last) == '.') |
| { |
| token = tkEllipsis; |
| p += 2; |
| } |
| else |
| { |
| token = tkDot; |
| } |
| break; |
| } |
| // May be a double, fall through |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| { |
| double dbl; |
| Assert(chType == _C_DIG || chType == _C_DOT); |
| p = m_pchMinTok; |
| this->RestoreMultiUnits(m_cMinTokMultiUnits); |
| LikelyNumberType likelyType = LikelyNumberType::Int; |
| pchT = FScanNumber(p, &dbl, likelyType, savedMultiUnits); |
| if (p == pchT) |
| { |
| this->RestoreMultiUnits(savedMultiUnits); |
| Assert(this->PeekFirst(p, last) != '.'); |
| Error(ERRbadNumber); |
| } |
| Assert(!Js::NumberUtilities::IsNan(dbl)); |
| if (likelyType == LikelyNumberType::BigInt) |
| { |
| Assert(m_scriptContext->GetConfig()->IsESBigIntEnabled()); |
| AssertOrFailFast(pchT - p < UINT_MAX); |
| token = tkBigIntCon; |
| m_ptoken->SetBigInt(this->GetHashTbl()->PidHashNameLen(p, pchT, (uint32) (pchT - p))); |
| p = pchT; |
| break; |
| } |
| p = pchT; |
| |
| int32 value; |
| if ((likelyType == LikelyNumberType::Int) && Js::NumberUtilities::FDblIsInt32(dbl, &value)) |
| { |
| m_ptoken->SetLong(value); |
| token = tkIntCon; |
| } |
| else |
| { |
| token = tkFltCon; |
| m_ptoken->SetDouble(dbl, likelyType == LikelyNumberType::Int); |
| } |
| |
| break; |
| } |
| case '(': Assert(chType == _C_LPR); token = tkLParen; break; |
| case ')': Assert(chType == _C_RPR); token = tkRParen; break; |
| case ',': Assert(chType == _C_CMA); token = tkComma; break; |
| case ';': Assert(chType == _C_SMC); token = tkSColon; break; |
| case '[': Assert(chType == _C_LBR); token = tkLBrack; break; |
| case ']': Assert(chType == _C_RBR); token = tkRBrack; break; |
| case '~': Assert(chType == _C_TIL); token = tkTilde; break; |
| case '?': Assert(chType == _C_QUE); token = tkQMark; break; |
| case '{': Assert(chType == _C_LC); token = tkLCurly; break; |
| |
| // ES 2015 11.3 Line Terminators |
| case '\r': |
| case '\n': |
| // kchLS: |
| // kchPS: |
| LNewLine: |
| m_currentCharacter = p; |
| ScanNewLine(ch); |
| p = m_currentCharacter; |
| m_fHadEol = TRUE; |
| continue; |
| |
| LReserved: |
| { |
| // We will derive the PID from the token |
| Assert(token < tkID); |
| m_ptoken->SetIdentifier(NULL); |
| goto LDone; |
| } |
| |
| LEval: |
| { |
| token = tkID; |
| if (!this->m_parser) goto LIdentifier; |
| m_ptoken->SetIdentifier(this->m_parser->GetEvalPid()); |
| goto LDone; |
| } |
| |
| LArguments: |
| { |
| token = tkID; |
| if (!this->m_parser) goto LIdentifier; |
| m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid()); |
| goto LDone; |
| } |
| |
| LTarget: |
| { |
| token = tkID; |
| if (!this->m_parser) goto LIdentifier; |
| m_ptoken->SetIdentifier(this->m_parser->GetTargetPid()); |
| goto LDone; |
| } |
| |
| #include "kwd-swtch.h" |
| case 'A': case 'B': case 'C': case 'D': case 'E': |
| case 'F': case 'G': case 'H': case 'I': case 'J': |
| case 'K': case 'L': case 'M': case 'N': case 'O': |
| case 'P': case 'Q': case 'R': case 'S': case 'T': |
| case 'U': case 'V': case 'W': case 'X': case 'Y': |
| case 'Z': |
| // Lower-case letters handled in kwd-swtch.h above during reserved word recognition. |
| case '$': case '_': |
| LIdentifier: |
| Assert(this->charClassifier->IsIdStart(ch)); |
| Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch)); |
| token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p); |
| break; |
| |
| case '`': |
| Assert(chType == _C_BKQ); |
| |
| pchT = p; |
| token = ScanStringTemplateBegin(&pchT); |
| p = pchT; |
| break; |
| |
| case '}': |
| Assert(chType == _C_RC); |
| token = tkRCurly; |
| break; |
| |
| case '\\': |
| pchT = p - 1; |
| token = ScanIdentifier(identifyKwds, &pchT); |
| if (tkScanError == token) |
| { |
| m_currentCharacter = p; |
| Error(ERRillegalChar); |
| } |
| p = pchT; |
| break; |
| |
| |
| case ':': |
| token = tkColon; |
| break; |
| |
| case '=': |
| token = tkAsg; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkEQ; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkEqv; |
| } |
| break; |
| case '>': |
| p++; |
| token = tkDArrow; |
| break; |
| } |
| break; |
| case '!': |
| token = tkBang; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkNE; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkNEqv; |
| } |
| } |
| break; |
| case '+': |
| token = tkAdd; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkAsgAdd; |
| break; |
| case '+': |
| p++; |
| token = tkInc; |
| break; |
| } |
| break; |
| case '-': |
| token = tkSub; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkAsgSub; |
| break; |
| case '-': |
| p++; |
| token = tkDec; |
| if (!m_fIsModuleCode) |
| { |
| // https://tc39.github.io/ecma262/#prod-annexB-MultiLineComment |
| // If there was a new line in the multi-line comment, the text after --> is a comment. |
| if ('>' == this->PeekFirst(p, last) && m_fHadEol) |
| { |
| goto LSkipLineComment; |
| } |
| } |
| break; |
| } |
| break; |
| case '*': |
| token = tkStar; |
| switch(this->PeekFirst(p, last)) |
| { |
| case '=' : |
| p++; |
| token = tkAsgMul; |
| break; |
| case '*' : |
| if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled()) |
| { |
| break; |
| } |
| p++; |
| token = tkExpo; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkAsgExpo; |
| } |
| } |
| break; |
| case '/': |
| token = tkDiv; |
| switch(this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkAsgDiv; |
| break; |
| case '/': |
| if (p >= last) |
| { |
| AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?"); |
| |
| // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning |
| // those, we have passed "last" already. Move back and return EOF. |
| p = last; |
| goto LEof; |
| } |
| ch = *++p; |
| firstChar = (OLECHAR)ch; |
| LSkipLineComment: |
| pchT = NULL; |
| for (;;) |
| { |
| switch ((ch = this->ReadFirst(p, last))) |
| { |
| case kchLS: // 0x2028, classifies as new line |
| case kchPS: // 0x2029, classifies as new line |
| LEcmaCommentLineBreak: |
| // kchPS and kchLS are more than one unit in UTF-8. |
| if (pchT) |
| { |
| // kchPS and kchLS are more than one unit in UTF-8. |
| p = pchT; |
| } |
| else |
| { |
| // But only a single code unit in UTF16 |
| p--; |
| } |
| this->RestoreMultiUnits(multiUnits); |
| goto LCommentLineBreak; |
| |
| case kchNWL: |
| case kchRET: |
| p--; |
| LCommentLineBreak: |
| // Subtract the comment length from the total char count for the purpose |
| // of deciding whether to defer AST and byte code generation. |
| m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok)); |
| break; |
| case kchNUL: |
| // Because we used ReadFirst, we have advanced p. The character that we are looking at is actually is p - 1. |
| // If p == last, we are looking at p - 1, it is still within the source buffer, and we need to consider it part of the comment |
| // Only if p > last that we have pass the source buffer and consider it a line break |
| if (p > last) |
| { |
| p--; |
| goto LCommentLineBreak; |
| } |
| continue; |
| |
| default: |
| if (this->IsMultiUnitChar((OLECHAR)ch)) |
| { |
| pchT = p - 1; |
| multiUnits = this->m_cMultiUnits; |
| switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last)) |
| { |
| case kchLS: |
| case kchPS: |
| goto LEcmaCommentLineBreak; |
| } |
| } |
| continue; |
| } |
| |
| break; |
| } |
| |
| continue; |
| |
| case '*': |
| ch = *++p; |
| firstChar = (OLECHAR)ch; |
| if ((p + 1) < last) |
| { |
| secondChar = (OLECHAR)(*(p + 1)); |
| } |
| else |
| { |
| secondChar = '\0'; |
| } |
| |
| pchT = p; |
| commentStartLine = m_line; |
| bool containTypeDef; |
| if (tkNone == (token = SkipComment(&pchT, &containTypeDef))) |
| { |
| // Subtract the comment length from the total char count for the purpose |
| // of deciding whether to defer AST and byte code generation. |
| m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok)); |
| p = pchT; |
| seenDelimitedCommentEnd = true; |
| goto LLoop; |
| } |
| p = pchT; |
| break; |
| } |
| break; |
| case '%': |
| Assert(chType == _C_PCT); |
| token = tkPct; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkAsgMod; |
| } |
| break; |
| case '<': |
| Assert(chType == _C_LT); |
| token = tkLT; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkLE; |
| break; |
| case '<': |
| p++; |
| token = tkLsh; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkAsgLsh; |
| break; |
| } |
| break; |
| case '!': |
| // ES 2015 B.1.3 - HTML comments are only allowed when parsing non-module code. |
| if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-') |
| { |
| // This is a "<!--" comment - treat as // |
| if (p >= last) |
| { |
| // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning |
| // those, we have passed "last" already. Move back and return EOF. |
| p = last; |
| goto LEof; |
| } |
| firstChar = '!'; |
| goto LSkipLineComment; |
| } |
| break; |
| } |
| break; |
| case '>': |
| Assert(chType == _C_GT); |
| token = tkGT; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkGE; |
| break; |
| case '>': |
| p++; |
| token = tkRsh; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkAsgRsh; |
| break; |
| case '>': |
| p++; |
| token = tkRs2; |
| if (*p == '=') |
| { |
| p++; |
| token = tkAsgRs2; |
| } |
| break; |
| } |
| break; |
| } |
| break; |
| case '^': |
| Assert(chType == _C_XOR); |
| token = tkXor; |
| if (this->PeekFirst(p, last) == '=') |
| { |
| p++; |
| token = tkAsgXor; |
| } |
| break; |
| case '|': |
| Assert(chType == _C_BAR); |
| token = tkOr; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkAsgOr; |
| break; |
| case '|': |
| p++; |
| token = tkLogOr; |
| break; |
| } |
| break; |
| case '&': |
| Assert(chType == _C_AMP); |
| token = tkAnd; |
| switch (this->PeekFirst(p, last)) |
| { |
| case '=': |
| p++; |
| token = tkAsgAnd; |
| break; |
| case '&': |
| p++; |
| token = tkLogAnd; |
| break; |
| } |
| break; |
| case '\'': |
| case '"': |
| Assert(chType == _C_QUO || chType == _C_APO); |
| pchT = p; |
| token = this->ScanStringConstant((OLECHAR)ch, &pchT); |
| p = pchT; |
| break; |
| } |
| |
| break; |
| } |
| |
| LDone: |
| m_currentCharacter = p; |
| return (m_ptoken->tk = token); |
| } |
| |
| template <typename EncodingPolicy> |
| IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid() |
| { |
| bool createPid = true; |
| |
| if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0) |
| { |
| createPid = false; |
| } |
| |
| if (createPid) |
| { |
| return this->GetHashTbl()->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur); |
| } |
| else |
| { |
| return nullptr; |
| } |
| } |
| |
| template <typename EncodingPolicy> |
| LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw) |
| { |
| _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10); |
| return m_tempChBuf.m_prgch; |
| } |
| |
| template <typename EncodingPolicy> |
| IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw) |
| { |
| return this->GetHashTbl()->PidHashName(StringFromLong(lw)); |
| } |
| |
| template <typename EncodingPolicy> |
| LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl) |
| { |
| if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax)) |
| { |
| Error(ERRnoMemory); |
| } |
| return m_tempChBuf.m_prgch; |
| } |
| |
| template <typename EncodingPolicy> |
| IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl) |
| { |
| return this->GetHashTbl()->PidHashName(StringFromDbl(dbl)); |
| } |
| |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint) |
| { |
| Capture(restorePoint, 0, 0); |
| } |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr) |
| { |
| restorePoint->m_ichMinTok = this->IchMinTok(); |
| restorePoint->m_ichMinLine = this->IchMinLine(); |
| restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits; |
| restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits; |
| restorePoint->m_line = this->m_line; |
| restorePoint->m_fHadEol = this->m_fHadEol; |
| |
| restorePoint->functionIdIncrement = functionIdIncrement; |
| restorePoint->lengthDecr = lengthDecr; |
| |
| #ifdef DEBUG |
| restorePoint->m_cMultiUnits = this->m_cMultiUnits; |
| #endif |
| } |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint) |
| { |
| SeekAndScan<false>(restorePoint); |
| } |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint) |
| { |
| SeekAndScan<true>(restorePoint); |
| } |
| |
| template <typename EncodingPolicy> |
| template <bool forcePid> |
| void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint) |
| { |
| this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits; |
| this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits; |
| this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits; |
| this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits); |
| |
| if (forcePid) |
| { |
| this->ScanForcingPid(); |
| } |
| else |
| { |
| this->Scan(); |
| } |
| |
| this->m_line = restorePoint.m_line; |
| this->m_fHadEol = restorePoint.m_fHadEol; |
| |
| this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr); |
| |
| Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits); |
| } |
| |
| template <typename EncodingPolicy> |
| void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId) |
| { |
| SeekTo(restorePoint); |
| *nextFunctionId += restorePoint.functionIdIncrement; |
| } |
| |
| // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred. |
| template<typename EncodingPolicy> |
| HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine) |
| { |
| if( !pbstrLine ) |
| { |
| return E_POINTER; |
| } |
| |
| // If we overflow the string, we have a serious problem... |
| if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() ) |
| { |
| return E_UNEXPECTED; |
| } |
| |
| typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine); |
| |
| // Determine the length by scanning for the next newline |
| size_t cb = 0; |
| charcount_t cch = LineLength(pStart, m_pchLast, &cb); |
| Assert(cch <= LONG_MAX); |
| |
| typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cb : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch); |
| |
| *pbstrLine = SysAllocStringLen(NULL, cch); |
| if (!*pbstrLine) |
| { |
| return E_OUTOFMEMORY; |
| } |
| |
| this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd); |
| return S_OK; |
| } |
| |
| template class Scanner<NotNullTerminatedUTF8EncodingPolicy>; |