lib/Parser/Scan.cpp - external/github.com/Microsoft/ChakraCore - Git at Google

 //-------------------------------------------------------------------------------------------------------
 // Copyright (C) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #include "ParserPch.h"

 /*****************************************************************************
 *
 *  The following table speeds various tests of characters, such as whether
 *  a given character can be part of an identifier, and so on.
 */

 int CountNewlines(LPCOLESTR psz)
 {
     int cln = 0;

     while (0 != *psz)
     {
         switch (*psz++)
         {
         case _u('\xD'):
             if (*psz == _u('\xA'))
             {
                 ++psz;
             }
             // fall-through
         case _u('\xA'):
             cln++;
             break;
         }
     }

     return cln;
 }

 BOOL Token::IsKeyword() const
 {
     // keywords (but not future reserved words)
     return (tk <= tkYIELD);
 }

 tokens Token::SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser)
 {
     Assert(parser);

     if(pattern)
         parser->RegisterRegexPattern(pattern);
     this->u.pattern = pattern;
     return tk = tkRegExp;
 }

 IdentPtr Token::CreateIdentifier(HashTbl * hashTbl)
 {
     Assert(this->u.pid == nullptr);
     if (this->u.pchMin)
     {
         Assert(IsIdentifier());
         IdentPtr pid = hashTbl->PidHashNameLen(this->u.pchMin, this->u.pchMin + this->u.length, this->u.length);
         this->u.pid = pid;
         return pid;
     }

     Assert(IsReservedWord());

     IdentPtr pid = hashTbl->PidFromTk(tk);
     this->u.pid = pid;
     return pid;
 }

 template <typename EncodingPolicy>
 Scanner<EncodingPolicy>::Scanner(Parser* parser, Token *ptoken, Js::ScriptContext* scriptContext)
 {
     Assert(ptoken);
     m_parser = parser;
     m_ptoken = ptoken;
     m_scriptContext = scriptContext;

     m_tempChBuf.m_pscanner = this;
     m_tempChBufSecondary.m_pscanner = this;

     this->charClassifier = scriptContext->GetCharClassifier();
     this->es6UnicodeMode = scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled();

     ClearStates();
 }

 template <typename EncodingPolicy>
 Scanner<EncodingPolicy>::~Scanner(void)
 {
 }

 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::ClearStates()
 {
     m_pchBase = nullptr;
     m_pchLast = nullptr;
     m_pchMinLine = nullptr;
     m_pchMinTok = nullptr;
     m_currentCharacter = nullptr;
     m_pchPrevLine = nullptr;

     m_cMinTokMultiUnits = 0;
     m_cMinLineMultiUnits = 0;

     m_fStringTemplateDepth = 0;

     m_fHadEol = FALSE;
     m_fIsModuleCode = FALSE;
     m_doubleQuoteOnLastTkStrCon = FALSE;
     m_OctOrLeadingZeroOnLastTKNumber = false;
     m_EscapeOnLastTkStrCon = false;
     m_fNextStringTemplateIsTagged = false;
     m_DeferredParseFlags = ScanFlagNone;

     m_fYieldIsKeywordRegion = false;
     m_fAwaitIsKeywordRegion = false;

     m_line = 0;
     m_scanState = ScanStateNormal;

     m_ichMinError = 0;
     m_ichLimError = 0;

     m_startLine = 0;
     m_pchStartLine = NULL;

     m_iecpLimTokPrevious = (size_t)-1;
     m_ichLimTokPrevious = (charcount_t)-1;
 }

 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::Clear()
 {
     EncodingPolicy::Clear();
     ClearStates();
     this->m_tempChBuf.Clear();
     this->m_tempChBufSecondary.Clear();
 }

 /*****************************************************************************
 *
 *  Initializes the scanner to prepare to scan the given source text.
 */
 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::SetText(EncodedCharPtr pszSrc, size_t offset, size_t length, charcount_t charOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber)
 {
     // Save the start of the script and add the offset to get the point where we should start scanning.
     m_pchBase = pszSrc;
     m_pchLast = m_pchBase + offset + length;
     m_pchPrevLine = m_currentCharacter = m_pchMinLine = m_pchMinTok = pszSrc + offset;

     this->RestoreMultiUnits(offset - charOffset);

     // Absorb any byte order mark at the start
     if(offset == 0)
     {
         switch( this->PeekFull(m_currentCharacter, m_pchLast) )
         {
         case 0xFFEE:    // "Opposite" endian BOM
             // We do not support big-endian encodings
             // fall-through

         case 0xFEFF:    // "Correct" BOM
             this->template ReadFull<true>(m_currentCharacter, m_pchLast);
             break;
         }
     }

     m_line = lineNumber;
     m_startLine = lineNumber;
     m_pchStartLine = m_currentCharacter;
     m_ptoken->tk = tkNone;
     m_fIsModuleCode = (grfscr & fscrIsModuleCode) != 0;
     m_fHadEol = FALSE;
     m_DeferredParseFlags = ScanFlagNone;

     this->SetIsUtf8(isUtf8);
 }

 #if ENABLE_BACKGROUND_PARSING
 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::PrepareForBackgroundParse(Js::ScriptContext *scriptContext)
 {
     scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
     scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
 }
 #endif

 //-----------------------------------------------------------------------------
 // Number of code points from 'first' up to, but not including the next
 // newline character, embedded NUL, or 'last', depending on which comes first.
 //
 // This is used to determine a length of BSTR, which can't contain a NUL character.
 //-----------------------------------------------------------------------------
 template <typename EncodingPolicy>
 charcount_t Scanner<EncodingPolicy>::LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb)
 {
     Assert(cb != nullptr);

     charcount_t result = 0;
     EncodedCharPtr p = first;

     for (;;)
     {
         EncodedCharPtr prev = p;
         switch( this->template ReadFull<false>(p, last) )
         {
             case kchNWL: // _C_NWL
             case kchRET:
             case kchLS:
             case kchPS:
             case kchNUL: // _C_NUL
                 // p is now advanced past the line terminator character.
                 // We need to know the number of bytes making up the line, not including the line terminator character.
                 // To avoid subtracting a variable number of bytes because the line terminator characters are different
                 // number of bytes long (plus there may be multiple valid encodings for these characters) just keep
                 // track of the first byte of the line terminator character in prev.
                 Assert(prev >= first);
                 *cb = prev - first;
                 return result;
         }
         result++;
     }
 }

 template <typename EncodingPolicy>
 charcount_t Scanner<EncodingPolicy>::UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd)
 {
     EncodedCharPtr p = start;
     charcount_t ich = ichStart;
     int32 current = line;
     charcount_t lastStart = ichStart;

     while (ich < ichEnd)
     {
         ich++;
         switch (this->template ReadFull<false>(p, last))
         {
         case kchRET:
             if (this->PeekFull(p, last) == kchNWL)
             {
                 ich++;
                 this->template ReadFull<false>(p, last);
             }
             // fall-through

         case kchNWL:
         case kchLS:
         case kchPS:
             current++;
             lastStart = ich;
             break;

         case kchNUL:
             goto done;
         }
     }

 done:
     line = current;
     return lastStart;
 }

 template <typename EncodingPolicy>
 bool Scanner<EncodingPolicy>::TryReadEscape(EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar)
 {
     Assert(outChar != nullptr);
     Assert(startingLocation <= endOfSource);

     EncodedCharPtr currentLocation = startingLocation;
     codepoint_t charToOutput = 0x0;

     // '\' is Assumed as there is only one caller
     // Read 'u' characters
     if (currentLocation >= endOfSource || this->ReadFirst(currentLocation, endOfSource) != 'u')
     {
         return false;
     }

     bool expectCurly = false;

     if (currentLocation < endOfSource && this->PeekFirst(currentLocation, endOfSource) == '{' && es6UnicodeMode)
     {
         expectCurly = true;
         // Move past the character
         this->ReadFirst(currentLocation, endOfSource);
     }

     uint i = 0;
     OLECHAR ch = 0;
     int hexValue = 0;
     uint maxHexDigits = (expectCurly ? MAXUINT32 : 4u);

     for(; i < maxHexDigits && currentLocation < endOfSource; i++)
     {
         if (!Js::NumberUtilities::FHexDigit(ch = this->ReadFirst(currentLocation, endOfSource), &hexValue))
         {
             break;
         }

         charToOutput = charToOutput * 0x10 + hexValue;

         if (charToOutput > 0x10FFFF)
         {
             return false;
         }
     }

     //At least 4 characters have to be read
     if (i == 0 || (i != 4 && !expectCurly))
     {
         return false;
     }

     Assert(expectCurly ? es6UnicodeMode : true);

     if (expectCurly && ch != '}')
     {
         return false;
     }

     *outChar = charToOutput;
     startingLocation = currentLocation;
     return true;
 }

 template <typename EncodingPolicy>
 template <bool bScan>
 bool Scanner<EncodingPolicy>::TryReadCodePointRest(codepoint_t lower, EncodedCharPtr& startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar)

 {
     Assert(outChar != nullptr);
     Assert(outContainsMultiUnitChar != nullptr);
     Assert(es6UnicodeMode);
     Assert(Js::NumberUtilities::IsSurrogateLowerPart(lower));

     EncodedCharPtr currentLocation = startingLocation;
     *outChar = lower;

     if (currentLocation < endOfSource)
     {
         size_t restorePoint = this->m_cMultiUnits;
         codepoint_t upper = this->template ReadFull<bScan>(currentLocation, endOfSource);

         if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
         {
             *outChar = Js::NumberUtilities::SurrogatePairAsCodePoint(lower, upper);

             if (this->IsMultiUnitChar(static_cast<OLECHAR>(upper)))
             {
                 *outContainsMultiUnitChar = true;
             }

             startingLocation = currentLocation;
         }
         else
         {
             this->RestoreMultiUnits(restorePoint);
         }
     }

     return true;
 }

 template <typename EncodingPolicy>
 template <bool bScan>
 inline bool Scanner<EncodingPolicy>::TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar)
 {
     Assert(outChar != nullptr);
     Assert(outContainsMultiUnitChar != nullptr);

     if (startingLocation >= endOfSource)
     {
         return false;
     }

     codepoint_t ch = this->template ReadFull<bScan>(startingLocation, endOfSource);
     if (FBigChar(ch))
     {
         if (this->IsMultiUnitChar(static_cast<OLECHAR>(ch)))
         {
             *outContainsMultiUnitChar = true;
         }

         if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
         {
             return TryReadCodePointRest<bScan>(ch, startingLocation, endOfSource, outChar, outContainsMultiUnitChar);
         }
     }
     else if (ch == '\\' && TryReadEscape(startingLocation, endOfSource, &ch))
     {
         *hasEscape = true;
     }

     *outChar = ch;
     return true;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp)
 {
     EncodedCharPtr p = *pp;
     EncodedCharPtr pchMin = p;

     // JS6 allows unicode characters in the form of \uxxxx escape sequences
     // to be part of the identifier.
     bool fHasEscape = false;
     bool fHasMultiChar = false;

     codepoint_t codePoint = INVALID_CODEPOINT;
     size_t multiUnitsBeforeLast = this->m_cMultiUnits;

     // Check if we started the id
     if (!TryReadCodePoint<true>(p, m_pchLast, &codePoint, &fHasEscape, &fHasMultiChar))
     {
         // If no chars. could be scanned as part of the identifier, return error.
         return tkScanError;
     }

     Assert(codePoint < 0x110000u);
     if (!charClassifier->IsIdStart(codePoint))
     {
         // Put back the last character
         this->RestoreMultiUnits(multiUnitsBeforeLast);

         // If no chars. could be scanned as part of the identifier, return error.
         return tkScanError;
     }

     return ScanIdentifierContinue(identifyKwds, fHasEscape, fHasMultiChar, pchMin, p, pp);
 }

 template <typename EncodingPolicy>
 BOOL Scanner<EncodingPolicy>::FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last)
 {
     if (EncodingPolicy::MultiUnitEncoding)
     {
         while (p < last)
         {
             EncodedChar currentChar = *p;
             if (this->IsMultiUnitChar(currentChar))
             {
                 // multi unit character, we may not have reach the end yet
                 return FALSE;
             }
             Assert(currentChar != '\\' || !charClassifier->IsIdContinueFast<false>(currentChar));
             if (!charClassifier->IsIdContinueFast<false>(currentChar))
             {
                 // only reach the end of the identifier if it is not the start of an escape sequence
                 return currentChar != '\\';
             }
             p++;
         }
         // We have reach the end of the identifier.
         return TRUE;
     }

     // Not fast path for non multi unit encoding
     return false;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar,
     EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp)
 {
     EncodedCharPtr last = m_pchLast;

     while (true)
     {
         // Fast path for utf8, non-multi unit char and not escape
         if (FastIdentifierContinue(p, last))
         {
             break;
         }

         // Slow path that has to deal with multi unit encoding
         codepoint_t codePoint = INVALID_CODEPOINT;
         EncodedCharPtr pchBeforeLast = p;
         size_t multiUnitsBeforeLast = this->m_cMultiUnits;
         if (TryReadCodePoint<true>(p, last, &codePoint, &fHasEscape, &fHasMultiChar))
         {
             Assert(codePoint < 0x110000u);
             if (charClassifier->IsIdContinue(codePoint))
             {
                 continue;
             }
         }

         // Put back the last character
         p = pchBeforeLast;
         this->RestoreMultiUnits(multiUnitsBeforeLast);
         break;
     }

     Assert(p - pchMin > 0 && p - pchMin <= LONG_MAX);

     *pp = p;

     if (!identifyKwds)
     {
         return tkID;
     }

     // UTF16 Scanner are only for syntax coloring, so it shouldn't come here.
     if (EncodingPolicy::MultiUnitEncoding && !fHasMultiChar && !fHasEscape)
     {
         Assert(sizeof(EncodedChar) == 1);

         // If there are no escape, that the main scan loop would have found the keyword already
         // So we can just assume it is an ID
         DebugOnly(int32 cch = UnescapeToTempBuf(pchMin, p));
         DebugOnly(tokens tk = Ident::TkFromNameLen(m_tempChBuf.m_prgch, cch, IsStrictMode()));
         Assert(tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()));

         m_ptoken->SetIdentifier(reinterpret_cast<const char *>(pchMin), (int32)(p - pchMin));
         return tkID;
     }

     IdentPtr pid = PidOfIdentiferAt(pchMin, p, fHasEscape, fHasMultiChar);
     m_ptoken->SetIdentifier(pid);

     if (!fHasEscape)
     {
         // If it doesn't have escape, then Scan() should have taken care of keywords (except
         // yield if m_fYieldIsKeyword is false, in which case yield is treated as an identifier, and except
         // await if m_fAwaitIsKeyword is false, in which case await is treated as an identifier).
         // We don't have to check if the name is reserved word and return it as an Identifier
         Assert(pid->Tk(IsStrictMode()) == tkID
             || (pid->Tk(IsStrictMode()) == tkYIELD && !this->YieldIsKeyword())
             || (pid->Tk(IsStrictMode()) == tkAWAIT && !this->AwaitIsKeyword()));
         return tkID;
     }
     tokens tk = pid->Tk(IsStrictMode());
     return tk == tkID || (tk == tkYIELD && !this->YieldIsKeyword()) || (tk == tkAWAIT && !this->AwaitIsKeyword()) ? tkID : tkNone;
 }

 template <typename EncodingPolicy>
 IdentPtr Scanner<EncodingPolicy>::PidAt(size_t iecpMin, size_t iecpLim)
 {
     Assert(iecpMin < AdjustedLength() && iecpLim <= AdjustedLength() && iecpLim > iecpMin);
     return PidOfIdentiferAt(m_pchBase + iecpMin, m_pchBase + iecpLim);
 }

 template <typename EncodingPolicy>
 uint32 Scanner<EncodingPolicy>::UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last)
 {
     m_tempChBuf.Reset();
     while( p < last )
     {
         codepoint_t codePoint;
         bool hasEscape, isMultiChar;
         bool gotCodePoint = TryReadCodePoint<false>(p, last, &codePoint, &hasEscape, &isMultiChar);
         Assert(gotCodePoint);
         Assert(codePoint < 0x110000);
         if (codePoint < 0x10000)
         {
             m_tempChBuf.AppendCh((OLECHAR)codePoint);
         }
         else
         {
             char16 lower, upper;
             Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &upper);
             m_tempChBuf.AppendCh(lower);
             m_tempChBuf.AppendCh(upper);
         }
     }
     return m_tempChBuf.m_ichCur;
 }

 template <typename EncodingPolicy>
 IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last)
 {
     int32 cch = UnescapeToTempBuf(p, last);
     return this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, cch);
 }

 template <typename EncodingPolicy>
 IdentPtr Scanner<EncodingPolicy>::PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar)
 {
     // If there is an escape sequence in the JS6 identifier or it is a UTF8
     // source then we have to convert it to the equivalent char so we use a
     // buffer for translation.
     if ((EncodingPolicy::MultiUnitEncoding && fHasMultiChar) || fHadEscape)
     {
         return PidOfIdentiferAt(p, last);
     }
     else if (EncodingPolicy::MultiUnitEncoding)
     {
         Assert(sizeof(EncodedChar) == 1);
         return this->GetHashTbl()->PidHashNameLen(reinterpret_cast<const char *>(p), reinterpret_cast<const char *>(last), (int32)(last - p));
     }
     else
     {
         Assert(sizeof(EncodedChar) == 2);
         return this->GetHashTbl()->PidHashNameLen(reinterpret_cast< const char16 * >(p), (int32)(last - p));
     }
 }

 template <typename EncodingPolicy>
 typename Scanner<EncodingPolicy>::EncodedCharPtr Scanner<EncodingPolicy>::FScanNumber(EncodedCharPtr p, double *pdbl, LikelyNumberType& likelyType, size_t savedMultiUnits)
 {
     EncodedCharPtr last = m_pchLast;
     EncodedCharPtr pchT = nullptr;
     bool baseSpecified = false;
     likelyType = LikelyNumberType::Int;
     // Reset
     m_OctOrLeadingZeroOnLastTKNumber = false;

     auto baseSpecifierCheck = [&pchT, &pdbl, p, &baseSpecified]()
     {
         if (pchT == p + 2)
         {
             // An octal token '0' was followed by a base specifier: /0[xXoObB]/
             // This literal can no longer be a double
             *pdbl = 0;
             // Advance the character pointer to the base specifier
             pchT = p + 1;
             // Set the flag so we know to offset the potential identifier search after the literal
             baseSpecified = true;
         }
     };

     if ('0' == this->PeekFirst(p, last))
     {
         switch(this->PeekFirst(p + 1, last))
         {
         case '.':
         case 'e':
         case 'E':
         case 'n':
             likelyType = LikelyNumberType::Double;
             // Floating point
             goto LFloat;

         case 'x':
         case 'X':
             // Hex
             *pdbl = Js::NumberUtilities::DblFromHex(p + 2, &pchT);
             baseSpecifierCheck();
             goto LIdCheck;
         case 'o':
         case 'O':
             // Octal
             *pdbl = Js::NumberUtilities::DblFromOctal(p + 2, &pchT);
             baseSpecifierCheck();
             goto LIdCheck;

         case 'b':
         case 'B':
             // Binary
             *pdbl = Js::NumberUtilities::DblFromBinary(p + 2, &pchT);
             baseSpecifierCheck();
             goto LIdCheck;

         default:
             // Octal
             *pdbl = Js::NumberUtilities::DblFromOctal(p, &pchT);
             Assert(pchT > p);

 #if !SOURCERELEASE
             // If an octal literal is malformed then it is in fact a decimal literal.
 #endif // !SOURCERELEASE
             if(*pdbl != 0 || pchT > p + 1)
                 m_OctOrLeadingZeroOnLastTKNumber = true; //report as an octal or hex for JSON when leading 0. Just '0' is ok

             switch (*pchT)
             {
             case '8':
             case '9':
                 //            case 'e':
                 //            case 'E':
                 //            case '.':
                 m_OctOrLeadingZeroOnLastTKNumber = false;  //08...  or 09....
                 goto LFloat;
             }
             goto LIdCheck;
         }
     }
     else
     {
 LFloat:
         *pdbl = Js::NumberUtilities::StrToDbl(p, &pchT, likelyType, m_scriptContext->GetConfig()->IsESBigIntEnabled());
         Assert(pchT == p || !Js::NumberUtilities::IsNan(*pdbl));
         if (likelyType == LikelyNumberType::BigInt)
         {
             Assert(*pdbl == 0);
         }
         // fall through to LIdCheck
     }

 LIdCheck:
     // https://tc39.github.io/ecma262/#sec-literals-numeric-literals
     // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
     // For example : 3in is an error and not the two input elements 3 and in
     // If a base was speficied, use the first character denoting the constant. In this case, pchT is pointing to the base specifier.
     EncodedCharPtr startingLocation = baseSpecified ? pchT + 1 : pchT;
     codepoint_t outChar = *startingLocation;
     if (this->IsMultiUnitChar((OLECHAR)outChar))
     {
         outChar = this->template ReadRest<true>((OLECHAR)outChar, startingLocation, last);
     }
     if (this->charClassifier->IsIdStart(outChar))
     {
         this->RestoreMultiUnits(savedMultiUnits);
         Error(ERRIdAfterLit);
     }

     // IsIdStart does not cover the unicode escape case. Try to read a unicode escape from the 'u' char.
     if (*pchT == '\\')
     {
         startingLocation++; // TryReadEscape expects us to point to the 'u', and since it is by reference we need to do it beforehand.
         if (TryReadEscape(startingLocation, m_pchLast, &outChar))
         {
             this->RestoreMultiUnits(savedMultiUnits);
             Error(ERRIdAfterLit);
         }
     }

     if (Js::NumberUtilities::IsDigit(*startingLocation))
     {
         this->RestoreMultiUnits(savedMultiUnits);
         Error(ERRbadNumber);
     }

     return pchT;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::TryRescanRegExp()
 {
     EncodedCharPtr current = m_currentCharacter;
     tokens result = RescanRegExp();
     if (result == tkScanError)
         m_currentCharacter = current;
     return result;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::RescanRegExp()
 {
 #if DEBUG
     switch (m_ptoken->tk)
     {
     case tkDiv:
         Assert(m_currentCharacter == m_pchMinTok + 1);
         break;
     case tkAsgDiv:
         Assert(m_currentCharacter == m_pchMinTok + 2);
         break;
     default:
         AssertMsg(FALSE, "Who is calling RescanRegExp?");
         break;
     }
 #endif //DEBUG

     m_currentCharacter = m_pchMinTok;
     if (*m_currentCharacter != '/')
         Error(ERRnoSlash);
     m_currentCharacter++;

     tokens tk = tkNone;

     {
         ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
         tk = ScanRegExpConstant(&alloc);
     }

     return tk;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::RescanRegExpNoAST()
 {
 #if DEBUG
     switch (m_ptoken->tk)
     {
     case tkDiv:
         Assert(m_currentCharacter == m_pchMinTok + 1);
         break;
     case tkAsgDiv:
         Assert(m_currentCharacter == m_pchMinTok + 2);
         break;
     default:
         AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
         break;
     }
 #endif //DEBUG

     m_currentCharacter = m_pchMinTok;
     if (*m_currentCharacter != '/')
         Error(ERRnoSlash);
     m_currentCharacter++;

     tokens tk = tkNone;

     {
         ArenaAllocator alloc(_u("RescanRegExp"), m_parser->GetAllocator()->GetPageAllocator(), m_parser->GetAllocator()->outOfMemoryFunc);
         {
             tk = ScanRegExpConstantNoAST(&alloc);
         }
     }

     return tk;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::RescanRegExpTokenizer()
 {
 #if DEBUG
     switch (m_ptoken->tk)
     {
     case tkDiv:
         Assert(m_currentCharacter == m_pchMinTok + 1);
         break;
     case tkAsgDiv:
         Assert(m_currentCharacter == m_pchMinTok + 2);
         break;
     default:
         AssertMsg(FALSE, "Who is calling RescanRegExpNoParseTree?");
         break;
     }
 #endif //DEBUG

     m_currentCharacter = m_pchMinTok;
     if (*m_currentCharacter != '/')
         Error(ERRnoSlash);
     m_currentCharacter++;

     tokens tk = tkNone;

     ThreadContext *threadContext = ThreadContext::GetContextForCurrentThread();
     threadContext->EnsureRecycler();
     Js::TempArenaAllocatorObject *alloc = threadContext->GetTemporaryAllocator(_u("RescanRegExp"));
     TryFinally(
         [&]() /* try block */
         {
             tk = this->ScanRegExpConstantNoAST(alloc->GetAllocator());
         },
         [&](bool /* hasException */) /* finally block */
         {
             threadContext->ReleaseTemporaryAllocator(alloc);
         });

     return tk;
 }

 template <typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanRegExpConstant(ArenaAllocator* alloc)
 {
     PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);

     // SEE ALSO: RegexHelper::PrimCompileDynamic()

 #ifdef PROFILE_EXEC
     m_scriptContext->ProfileBegin(Js::RegexCompilePhase);
 #endif
     ArenaAllocator* ctAllocator = alloc;
     UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = m_scriptContext->GetThreadContext()->GetStandardChars((EncodedChar*)0);
     UnifiedRegex::StandardChars<char16>* standardChars = m_scriptContext->GetThreadContext()->GetStandardChars((char16*)0);
 #if ENABLE_REGEX_CONFIG_OPTIONS
     UnifiedRegex::DebugWriter *w = 0;
     if (REGEX_CONFIG_FLAG(RegexDebug))
         w = m_scriptContext->GetRegexDebugWriter();
     if (REGEX_CONFIG_FLAG(RegexProfile))
         m_scriptContext->GetRegexStatsDatabase()->BeginProfile();
 #endif
     UnifiedRegex::Node* root = 0;
     charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
     UnifiedRegex::RegexFlags flags = UnifiedRegex::NoRegexFlags;
     UnifiedRegex::Parser<EncodingPolicy, true> parser
             ( m_scriptContext
             , ctAllocator
             , standardEncodedChars
             , standardChars
             , this->IsUtf8()
 #if ENABLE_REGEX_CONFIG_OPTIONS
             , w
 #endif
             );
     try
     {
         root = parser.ParseLiteral(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars, flags);
     }
     catch (UnifiedRegex::ParseError e)
     {
 #ifdef PROFILE_EXEC
         m_scriptContext->ProfileEnd(Js::RegexCompilePhase);
 #endif
         m_currentCharacter += e.encodedPos;
         Error(e.error);
     }

     UnifiedRegex::RegexPattern* pattern;
     if (m_parser->IsBackgroundParser())
     {
         // Avoid allocating pattern from recycler on background thread. The main thread will create the pattern
         // and hook it to this parse node.
         pattern = parser.template CompileProgram<false>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
     }
     else
     {
         pattern = parser.template CompileProgram<true>(root, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, flags);
     }
     this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits

     return m_ptoken->SetRegex(pattern, m_parser);
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanRegExpConstantNoAST(ArenaAllocator* alloc)
 {
     PROBE_STACK_NO_DISPOSE(m_scriptContext, Js::Constants::MinStackRegex);

     ThreadContext *threadContext = m_scriptContext->GetThreadContext();
     UnifiedRegex::StandardChars<EncodedChar>* standardEncodedChars = threadContext->GetStandardChars((EncodedChar*)0);
     UnifiedRegex::StandardChars<char16>* standardChars = threadContext->GetStandardChars((char16*)0);
     charcount_t totalLen = 0, bodyChars = 0, totalChars = 0, bodyLen = 0;
     UnifiedRegex::Parser<EncodingPolicy, true> parser
             ( m_scriptContext
             , alloc
             , standardEncodedChars
             , standardChars
             , this->IsUtf8()
 #if ENABLE_REGEX_CONFIG_OPTIONS
             , 0
 #endif
             );
     try
     {
         parser.ParseLiteralNoAST(m_currentCharacter, m_pchLast, bodyLen, totalLen, bodyChars, totalChars);
     }
     catch (UnifiedRegex::ParseError e)
     {
         m_currentCharacter += e.encodedPos;
         Error(e.error);
         // never reached
     }

     UnifiedRegex::RegexPattern* pattern = parser.template CompileProgram<false>(nullptr, m_currentCharacter, totalLen, bodyChars, bodyLen, totalChars, UnifiedRegex::NoRegexFlags);
     Assert(pattern == nullptr);  // BuildAST == false, CompileProgram should return nullptr
     this->RestoreMultiUnits(this->m_cMultiUnits + parser.GetMultiUnits()); // m_currentCharacter changed, sync MultiUnits

     return (m_ptoken->tk = tkRegExp);

 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanStringTemplateBegin(EncodedCharPtr *pp)
 {
     // String template must begin with a string constant followed by '`' or '${'
     ScanStringConstant<true, true>('`', pp);

     OLECHAR ch;
     EncodedCharPtr last = m_pchLast;

     ch = this->ReadFirst(*pp, last);

     if (ch == '`')
     {
         // Simple string template - no substitutions
         return tkStrTmplBasic;
     }
     else if (ch == '$')
     {
         ch = this->ReadFirst(*pp, last);

         if (ch == '{')
         {
             // Next token after expr should be tkStrTmplMid or tkStrTmplEnd.
             // In string template scanning mode, we expect the next char to be '}'
             // and will treat it as the beginning of tkStrTmplEnd or tkStrTmplMid
             m_fStringTemplateDepth++;

             // Regular string template begin - next is first substitution
             return tkStrTmplBegin;
         }
     }

     // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
     (*pp)--;
     return ScanError(m_currentCharacter, tkStrTmplBegin);
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp)
 {
     // String template middle and end tokens must begin with a string constant
     ScanStringConstant<true, true>('`', pp);

     OLECHAR ch;
     EncodedCharPtr last = m_pchLast;

     ch = this->ReadFirst(*pp, last);

     if (ch == '`')
     {
         // No longer in string template scanning mode
         m_fStringTemplateDepth--;

         // This is the last part of the template ...`
         return tkStrTmplEnd;
     }
     else if (ch == '$')
     {
         ch = this->ReadFirst(*pp, last);

         if (ch == '{')
         {
             // This is just another middle part of the template }...${
             return tkStrTmplMid;
         }
     }

     // Error - make sure pointer stays at the last character of the error token instead of after it in the error case
     (*pp)--;
     return ScanError(m_currentCharacter, tkStrTmplEnd);
 }

 /*****************************************************************************
 *
 *  Parses a string constant. Note that the string value is stored in
 *  a volatile buffer (or allocated on the heap if too long), and thus
 *  the string should be saved off before the next token is scanned.
 */

 template<typename EncodingPolicy>
 template<bool stringTemplateMode, bool createRawString>
 tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
 {
     static_assert((stringTemplateMode && createRawString) || (!stringTemplateMode && !createRawString), "stringTemplateMode and createRawString must have the same value");

     OLECHAR ch, c, rawch;
     int wT;
     EncodedCharPtr p = *pp;
     EncodedCharPtr last = m_pchLast;

     // Reset
     m_OctOrLeadingZeroOnLastTKNumber = false;
     m_EscapeOnLastTkStrCon = FALSE;

     m_tempChBuf.Reset();

     // Use template parameter to gate raw string creation.
     // If createRawString is false, all these operations should be no-ops
     if (createRawString)
     {
         m_tempChBufSecondary.Reset();
     }

     for (;;)
     {
         switch ((rawch = ch = this->ReadFirst(p, last)))
         {
         case kchRET:
             if (stringTemplateMode)
             {
                 if (this->PeekFirst(p, last) == kchNWL)
                 {
                     // Eat the <LF> char, ignore return
                     this->ReadFirst(p, last);
                 }

                 // Both <CR> and <CR><LF> are normalized to <LF> in template cooked and raw values
                 ch = rawch = kchNWL;
             }

 LEcmaLineBreak:
             // Fall through
         case kchNWL:
             if (stringTemplateMode)
             {
                 // Notify the scanner to update current line, number of lines etc
                 NotifyScannedNewLine();

                 // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out.
                 m_pchMinLine = p;

                 break;
             }

             m_currentCharacter = p - 1;
             Error(ERRnoStrEnd);

         case '"':
         case '\'':
             if (ch == delim)
                 goto LBreak;
             break;

         case '`':
             // In string template scan mode, don't consume the '`' - we need to differentiate
             // between a closed string template and the expression open sequence - ${
             if (stringTemplateMode)
             {
                 p--;
                 goto LBreak;
             }

             // If we aren't scanning for a string template, do the default thing
             goto LMainDefault;

         case '$':
             // If we are parsing a string literal part of a string template, ${ indicates we need to switch
             // to parsing an expression.
             if (stringTemplateMode && this->PeekFirst(p, last) == '{')
             {
                 // Rewind to the $ and return
                 p--;
                 goto LBreak;
             }

             // If we aren't scanning for a string template, do the default thing
             goto LMainDefault;

         case kchNUL:
             if (p > last)
             {
                 m_currentCharacter = p - 1;
                 Error(ERRnoStrEnd);
             }
             break;

         default:
 LMainDefault:
             if (this->IsMultiUnitChar(ch))
             {
                 if ((ch == kchLS || ch == kchPS))
                 {
                     goto LEcmaLineBreak;
                 }

                 rawch = ch = this->template ReadRest<true>(ch, p, last);
                 switch (ch)
                 {
                 case kchLS: // 0x2028, classifies as new line
                 case kchPS: // 0x2029, classifies as new line
                     goto LEcmaLineBreak;
                 }
             }
             break;

         case kchBSL:
             // In raw mode '\\' is not an escape character, just add the char into the raw buffer.
             m_tempChBufSecondary.template AppendCh<createRawString>(ch);

             m_EscapeOnLastTkStrCon=TRUE;

             // In raw mode, we append the raw char itself and not the escaped value so save the char.
             rawch = ch = this->ReadFirst(p, last);
             codepoint_t codePoint = 0;
             uint errorType = (uint)ERRbadHexDigit;
             switch (ch)
             {
             case 'b':
                 ch = 0x08;
                 break;
             case 't':
                 ch = 0x09;
                 break;
             case 'v':
                 ch = 0x0B; //Only in ES5 mode
                 break; //same as default
             case 'n':
                 ch = 0x0A;
                 break;
             case 'f':
                 ch = 0x0C;
                 break;
             case 'r':
                 ch = 0x0D;
                 break;
             case 'x':
                 // Insert the 'x' here before jumping to parse the hex digits.
                 m_tempChBufSecondary.template AppendCh<createRawString>(ch);

                 // 2 hex digits
                 ch = 0;
                 goto LTwoHex;
             case 'u':
                 // Raw string just inserts a 'u' here.
                 m_tempChBufSecondary.template AppendCh<createRawString>(ch);

                 ch = 0;
                 if (Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
                     goto LFourHex;
                 else if (c != '{' || !this->es6UnicodeMode)
                     goto ReturnScanError;

                 Assert(c == '{');
                 // c should definitely be a '{' which should be appended to the raw string.
                 m_tempChBufSecondary.template AppendCh<createRawString>(c);

                 //At least one digit is expected
                 if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
                 {
                     goto ReturnScanError;
                 }

                 m_tempChBufSecondary.template AppendCh<createRawString>(c);

                 codePoint = static_cast<codepoint_t>(wT);

                 while(Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
                 {
                     m_tempChBufSecondary.template AppendCh<createRawString>(c);
                     codePoint <<= 4;
                     codePoint += static_cast<codepoint_t>(wT);

                     if (codePoint > 0x10FFFF)
                     {
                         errorType = (uint)ERRInvalidCodePoint;
                         goto ReturnScanError;
                     }
                 }

                 if (c != '}')
                 {
                     errorType = (uint)ERRMissingCurlyBrace;
                     goto ReturnScanError;
                 }

                 Assert(codePoint <= 0x10FFFF);

                 if (codePoint >= 0x10000)
                 {
                     OLECHAR lower = 0;
                     Js::NumberUtilities::CodePointAsSurrogatePair(codePoint, &lower, &ch);
                     m_tempChBuf.AppendCh(lower);
                 }
                 else
                 {
                     ch = (char16)codePoint;
                 }

                 // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
                 if (createRawString)
                     rawch = c;

                 break;
 LFourHex:
                 codePoint = 0x0;
                 // Append first hex digit character to the raw string.
                 m_tempChBufSecondary.template AppendCh<createRawString>(c);

                 codePoint += static_cast<codepoint_t>(wT * 0x1000);
                 if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
                     goto ReturnScanError;

                 // Append fourth (or second) hex digit character to the raw string.
                 m_tempChBufSecondary.template AppendCh<createRawString>(c);

                 codePoint += static_cast<codepoint_t>(wT * 0x0100);

 LTwoHex:
                 // This code path doesn't expect curly.
                 if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
                     goto ReturnScanError;

                 // Append first hex digit character to the raw string.
                 m_tempChBufSecondary.template AppendCh<createRawString>(c);

                 codePoint += static_cast<codepoint_t>(wT * 0x0010);

                 if (!Js::NumberUtilities::FHexDigit(c = this->ReadFirst(p, last), &wT))
                     goto ReturnScanError;

                 codePoint += static_cast<codepoint_t>(wT);

                 // In raw mode we want the last hex character or the closing curly. c should hold one or the other.
                 if (createRawString)
                     rawch = c;

                 if (codePoint < 0x10000)
                 {
                     ch = static_cast<OLECHAR>(codePoint);
                 }
                 else
                 {
                     goto ReturnScanError;
                 }
                 break;
             case '0':
             case '1':
             case '2':
             case '3':
                 // 1 to 3 octal digits

                 ch -= '0';

                 // Octal escape sequences are not allowed inside string template literals
                 if (stringTemplateMode)
                 {
                     c = this->PeekFirst(p, last);
                     if (ch != 0 || (c >= '0' && c <= '7'))
                     {
                         errorType = (uint)ERRES5NoOctal;
                         goto ReturnScanError;
                     }
                     break;
                 }

                 wT = (c = this->ReadFirst(p, last)) - '0';
                 if ((char16)wT > 7)
                 {
                     if (ch != 0 || ((char16)wT <= 9))
                     {
                         m_OctOrLeadingZeroOnLastTKNumber = true;
                     }
                     p--;
                     break;
                 }

                 m_OctOrLeadingZeroOnLastTKNumber = true;
                 ch = static_cast< OLECHAR >(ch * 8 + wT);
                 goto LOneOctal;
             case '4':
             case '5':
             case '6':
             case '7':
                 // 1 to 2 octal digits

                 // Octal escape sequences are not allowed inside string template literals
                 if (stringTemplateMode)
                 {
                     errorType = (uint)ERRES5NoOctal;
                     goto ReturnScanError;
                 }

                 ch -= '0';

                 m_OctOrLeadingZeroOnLastTKNumber = true;

 LOneOctal:
                 wT = (c = this->ReadFirst(p, last)) - '0';
                 if ((char16)wT > 7)
                 {
                     p--;
                     break;
                 }

                 ch = static_cast< OLECHAR >(ch * 8 + wT);
                 break;

             case kchRET:        // 0xD
                 if (stringTemplateMode)
                 {
                     // If this is \<CR><LF> we can eat the <LF> right now
                     if (this->PeekFirst(p, last) == kchNWL)
                     {
                         // Eat the <LF> char, ignore return
                         this->ReadFirst(p, last);
                     }

                     // Both \<CR> and \<CR><LF> are normalized to \<LF> in template raw string
                     rawch = kchNWL;
                 }
             case kchLS:         // 0x2028, classifies as new line
             case kchPS:         // 0x2029, classifies as new line
             case kchNWL:        // 0xA
 LEcmaEscapeLineBreak:
                 if (stringTemplateMode)
                 {
                     // We're going to ignore the line continuation tokens for the cooked strings, but we need to append the token for raw strings
                     m_tempChBufSecondary.template AppendCh<createRawString>(rawch);

                     // Template literal strings ignore all escaped line continuation tokens
                     NotifyScannedNewLine();

                     // We haven't updated m_currentCharacter yet, so make sure the MinLine info is correct in case we error out.
                     m_pchMinLine = p;

                     continue;
                 }

                 m_currentCharacter = p;
                 ScanNewLine(ch);
                 p = m_currentCharacter;
                 continue;

             case 0:
                 if (p >= last)
                 {
                     errorType = (uint)ERRnoStrEnd;

 ReturnScanError:
                     m_currentCharacter = p - 1;
                     Error(errorType);
                 }
                 else if (stringTemplateMode)
                 {
                     // Escaped null character is translated into 0x0030 for raw template literals
                     rawch = 0x0030;
                 }
                 break;

             default:
                 if (this->IsMultiUnitChar(ch))
                 {
                     rawch = ch = this->template ReadRest<true>(ch, p, last);
                     switch (ch)
                     {
                     case kchLS:
                     case kchPS:
                         goto LEcmaEscapeLineBreak;
                     }
                 }
                 break;
             }
             break;
         }

         m_tempChBuf.AppendCh(ch);
         m_tempChBufSecondary.template AppendCh<createRawString>(rawch);
     }

 LBreak:
     bool createPid = true;

     if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
     {
         createPid = false;

         if ((m_tempChBuf.m_ichCur == 10) && (0 == memcmp(_u("use strict"), m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur * sizeof(OLECHAR))))
         {
             createPid = true;
         }
     }

     if (createPid)
     {
         m_ptoken->SetIdentifier(this->GetHashTbl()->PidHashNameLen(m_tempChBuf.m_prgch, m_tempChBuf.m_ichCur));
     }
     else
     {
         m_ptoken->SetIdentifier(NULL);
     }

     m_scanState = ScanStateNormal;
     m_doubleQuoteOnLastTkStrCon = '"' == delim;
     *pp = p;

     return tkStrCon;
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp)
 {
     return ScanStringConstant<false, false>(delim, pp);
 }

 /*****************************************************************************
 *
 *  Consume a C-style comment.
 */
 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef)
 {
     Assert(containTypeDef != nullptr);
     EncodedCharPtr p = *pp;
     *containTypeDef = false;
     EncodedCharPtr last = m_pchLast;
     OLECHAR ch;

     for (;;)
     {
         switch((ch = this->ReadFirst(p, last)))
         {
         case '*':
             if (*p == '/')
             {
                 *pp = p + 1;
                 return tkNone;
             }
             break;

         // ES 2015 11.3 Line Terminators
         case kchLS:         // 0x2028, classifies as new line
         case kchPS:         // 0x2029, classifies as new line
 LEcmaLineBreak:
             goto LLineBreak;

         case kchRET:
         case kchNWL:
 LLineBreak:
             m_fHadEol = TRUE;
             m_currentCharacter = p;
             ScanNewLine(ch);
             p = m_currentCharacter;
             break;

         case kchNUL:
             if (p >= last)
             {
                 m_currentCharacter = p - 1;
                 *pp = p - 1;
                 Error(ERRnoCmtEnd);
             }
             break;

         default:
             if (this->IsMultiUnitChar(ch))
             {
                 ch = this->template ReadRest<true>(ch, p, last);
                 switch (ch)
                 {
                 case kchLS:
                 case kchPS:
                     goto LEcmaLineBreak;
                 }
             }
             break;
         }
     }
 }

 /*****************************************************************************
 *
 *  We've encountered a newline - update various counters and things.
 */
 template<typename EncodingPolicy>
 void Scanner<EncodingPolicy>::ScanNewLine(uint ch)
 {
     if (ch == '\r' && PeekNextChar() == '\n')
     {
         ReadNextChar();
     }

     NotifyScannedNewLine();
 }

 /*****************************************************************************
 *
 *  We've encountered a newline - update various counters and things.
 */
 template<typename EncodingPolicy>
 void Scanner<EncodingPolicy>::NotifyScannedNewLine()
 {
     // update in scanner:  previous line, current line, number of lines.
     m_line++;
     m_pchPrevLine = m_pchMinLine;
     m_pchMinLine = m_currentCharacter;
     m_cMinLineMultiUnits = this->m_cMultiUnits;
 }

 /*****************************************************************************
 *
 *  Delivers a token stream.
 */


 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanForcingPid()
 {
     if (m_DeferredParseFlags != ScanFlagNone)
     {
         BYTE deferredParseFlagsSave = m_DeferredParseFlags;
         m_DeferredParseFlags = ScanFlagNone;
         tokens result = tkEOF;
         TryFinally(
             [&]() /* try block */
             {
                 result = this->Scan();
             },
             [&](bool) /* finally block */
             {
                 this->m_DeferredParseFlags = deferredParseFlagsSave;
             });

         return result;
     }
     return Scan();
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::Scan()
 {
     return ScanCore(true);
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanNoKeywords()
 {
     return ScanCore(false);
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanAhead()
 {
     return ScanNoKeywords();
 }

 template<typename EncodingPolicy>
 tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
 {
     codepoint_t ch;
     OLECHAR firstChar;
     OLECHAR secondChar;
     EncodedCharPtr pchT;
     size_t multiUnits = 0;
     EncodedCharPtr p = m_currentCharacter;
     EncodedCharPtr last = m_pchLast;
     bool seenDelimitedCommentEnd = false;

     // store the last token
     m_tkPrevious = m_ptoken->tk;
     m_iecpLimTokPrevious = IecpLimTok();    // Introduced for use by lambda parsing to find correct span of expression lambdas
     m_ichLimTokPrevious = IchLimTok();
     size_t savedMultiUnits = this->m_cMultiUnits;

     if (p >= last)
     {
         m_pchMinTok = p;
         m_cMinTokMultiUnits = this->m_cMultiUnits;
         goto LEof;
     }
     tokens token;
     m_fHadEol = FALSE;
     CharTypes chType;
     charcount_t commentStartLine;

     if (m_scanState && *p != 0)
     {
         if (m_scanState == ScanStateStringTemplateMiddleOrEnd)
         {
             AssertMsg(m_fStringTemplateDepth > 0,
                 "Shouldn't be trying to parse a string template end or middle token if we aren't scanning a string template");

             m_scanState = ScanStateNormal;

             pchT = p;
             token = ScanStringTemplateMiddleOrEnd(&pchT);
             p = pchT;

             goto LDone;
         }
     }

     for (;;)
     {
 LLoop:
         m_pchMinTok = p;
         m_cMinTokMultiUnits = this->m_cMultiUnits;
         ch = this->ReadFirst(p, last);
 #if DEBUG
         chType = this->charClassifier->GetCharType((OLECHAR)ch);
 #endif
         switch (ch)
         {
         default:
             if (ch == kchLS ||
                 ch == kchPS )
             {
                 goto LNewLine;
             }
             {
                 BOOL isMultiUnit = this->IsMultiUnitChar((OLECHAR)ch);
                 if (isMultiUnit)
                 {
                     ch = this->template ReadRest<true>((OLECHAR)ch, p, last);
                 }

                 if (es6UnicodeMode && Js::NumberUtilities::IsSurrogateLowerPart(ch))
                 {
                     codepoint_t upper = this->PeekFull(p, last);

                     if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
                     {
                         // Consume the rest of the utf8 bytes for the codepoint
                         OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
                         Assert(decodedUpper == (OLECHAR) upper);
                         ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
                     }
                 }

                 if (this->charClassifier->IsIdStart(ch))
                 {
                     // We treat IDContinue as an error.
                     token = ScanIdentifierContinue(identifyKwds, false, !!isMultiUnit, m_pchMinTok, p, &p);
                     break;
                 }
             }

             chType = this->charClassifier->GetCharType(ch);
             switch (chType)
             {
             case _C_WSP: continue;
             case _C_NWL: goto LNewLine;
             // All other types (except errors) are handled by the outer switch.
             }
             Assert(chType == _C_LET || chType == _C_ERR || chType == _C_UNK || chType == _C_BKQ || chType == _C_SHP || chType == _C_AT || chType == _C_DIG);
             m_currentCharacter = p - 1;
             Error(ERRillegalChar);
             continue;

         case '\0':
             // Put back the null in case we get called again.
             p--;
             if (p < last)
             {
                 // A \0 prior to the end of the text is an invalid character.
                 m_currentCharacter = p;
                 Error(ERRillegalChar);
             }
 LEof:
             Assert(p >= last);
             token = tkEOF;
             break;

         case 0x0009:
         case 0x000B:
         case 0x000C:
         case 0x0020:
             Assert(chType == _C_WSP);
             continue;

         case '.':
             if (!Js::NumberUtilities::IsDigit(*p))
             {
                 // Not a double
                 if (m_scriptContext->GetConfig()->IsES6SpreadEnabled() &&
                     this->PeekFirst(p, last) == '.' &&
                     this->PeekFirst(p + 1, last) == '.')
                 {
                     token = tkEllipsis;
                     p += 2;
                 }
                 else
                 {
                     token = tkDot;
                 }
                 break;
             }
             // May be a double, fall through
         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
             {
                 double dbl;
                 Assert(chType == _C_DIG || chType == _C_DOT);
                 p = m_pchMinTok;
                 this->RestoreMultiUnits(m_cMinTokMultiUnits);
                 LikelyNumberType likelyType = LikelyNumberType::Int;
                 pchT = FScanNumber(p, &dbl, likelyType, savedMultiUnits);
                 if (p == pchT)
                 {
                     this->RestoreMultiUnits(savedMultiUnits);
                     Assert(this->PeekFirst(p, last) != '.');
                     Error(ERRbadNumber);
                 }
                 Assert(!Js::NumberUtilities::IsNan(dbl));
                 if (likelyType == LikelyNumberType::BigInt)
                 {
                     Assert(m_scriptContext->GetConfig()->IsESBigIntEnabled());
                     AssertOrFailFast(pchT - p < UINT_MAX);
                     token = tkBigIntCon;
                     m_ptoken->SetBigInt(this->GetHashTbl()->PidHashNameLen(p, pchT, (uint32) (pchT - p)));
                     p = pchT;
                     break;
                 }
                 p = pchT;

                 int32 value;
                 if ((likelyType == LikelyNumberType::Int) && Js::NumberUtilities::FDblIsInt32(dbl, &value))
                 {
                     m_ptoken->SetLong(value);
                     token = tkIntCon;
                 }
                 else
                 {
                     token = tkFltCon;
                     m_ptoken->SetDouble(dbl, likelyType == LikelyNumberType::Int);
                 }

                 break;
             }
         case '(': Assert(chType == _C_LPR); token = tkLParen; break;
         case ')': Assert(chType == _C_RPR); token = tkRParen; break;
         case ',': Assert(chType == _C_CMA); token = tkComma;  break;
         case ';': Assert(chType == _C_SMC); token = tkSColon; break;
         case '[': Assert(chType == _C_LBR); token = tkLBrack; break;
         case ']': Assert(chType == _C_RBR); token = tkRBrack; break;
         case '~': Assert(chType == _C_TIL); token = tkTilde;  break;
         case '?': Assert(chType == _C_QUE); token = tkQMark;  break;
         case '{': Assert(chType == _C_LC);  token = tkLCurly; break;

         // ES 2015 11.3 Line Terminators
         case '\r':
         case '\n':
         // kchLS:
         // kchPS:
 LNewLine:
             m_currentCharacter = p;
             ScanNewLine(ch);
             p = m_currentCharacter;
             m_fHadEol = TRUE;
             continue;

 LReserved:
             {
                 // We will derive the PID from the token
                 Assert(token < tkID);
                 m_ptoken->SetIdentifier(NULL);
                 goto LDone;
             }

 LEval:
             {
                 token = tkID;
                 if (!this->m_parser) goto LIdentifier;
                 m_ptoken->SetIdentifier(this->m_parser->GetEvalPid());
                 goto LDone;
             }

 LArguments:
             {
                 token = tkID;
                 if (!this->m_parser) goto LIdentifier;
                 m_ptoken->SetIdentifier(this->m_parser->GetArgumentsPid());
                 goto LDone;
             }

 LTarget:
             {
                 token = tkID;
                 if (!this->m_parser) goto LIdentifier;
                 m_ptoken->SetIdentifier(this->m_parser->GetTargetPid());
                 goto LDone;
             }

 #include "kwd-swtch.h"
         case 'A': case 'B': case 'C': case 'D': case 'E':
         case 'F': case 'G': case 'H': case 'I': case 'J':
         case 'K': case 'L': case 'M': case 'N': case 'O':
         case 'P': case 'Q': case 'R': case 'S': case 'T':
         case 'U': case 'V': case 'W': case 'X': case 'Y':
         case 'Z':
         // Lower-case letters handled in kwd-swtch.h above during reserved word recognition.
         case '$': case '_':
 LIdentifier:
             Assert(this->charClassifier->IsIdStart(ch));
             Assert(ch < 0x10000 && !this->IsMultiUnitChar((OLECHAR)ch));
             token = ScanIdentifierContinue(identifyKwds, false, false, m_pchMinTok, p, &p);
             break;

         case '`':
             Assert(chType == _C_BKQ);

             pchT = p;
             token = ScanStringTemplateBegin(&pchT);
             p = pchT;
             break;

         case '}':
             Assert(chType == _C_RC);
             token = tkRCurly;
             break;

         case '\\':
             pchT = p - 1;
             token = ScanIdentifier(identifyKwds, &pchT);
             if (tkScanError == token)
             {
                 m_currentCharacter = p;
                 Error(ERRillegalChar);
             }
             p = pchT;
             break;


         case ':':
             token = tkColon;
             break;

         case '=':
             token = tkAsg;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkEQ;
                 if (this->PeekFirst(p, last) == '=')
                 {
                     p++;
                     token = tkEqv;
                 }
                 break;
             case '>':
                 p++;
                 token = tkDArrow;
                 break;
             }
             break;
         case '!':
             token = tkBang;
             if (this->PeekFirst(p, last) == '=')
             {
                 p++;
                 token = tkNE;
                 if (this->PeekFirst(p, last) == '=')
                 {
                     p++;
                     token = tkNEqv;
                 }
             }
             break;
         case '+':
             token = tkAdd;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkAsgAdd;
                 break;
             case '+':
                 p++;
                 token = tkInc;
                 break;
             }
             break;
         case '-':
             token = tkSub;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkAsgSub;
                 break;
             case '-':
                 p++;
                 token = tkDec;
                 if (!m_fIsModuleCode)
                 {
                     // https://tc39.github.io/ecma262/#prod-annexB-MultiLineComment
                     // If there was a new line in the multi-line comment, the text after --> is a comment.
                     if ('>' == this->PeekFirst(p, last) && m_fHadEol)
                     {
                         goto LSkipLineComment;
                     }
                 }
                 break;
             }
             break;
         case '*':
             token = tkStar;
             switch(this->PeekFirst(p, last))
             {
             case '=' :
                 p++;
                 token = tkAsgMul;
                 break;
             case '*' :
                 if (!m_scriptContext->GetConfig()->IsES7ExponentiationOperatorEnabled())
                 {
                     break;
                 }
                 p++;
                 token = tkExpo;
                 if (this->PeekFirst(p, last) == '=')
                 {
                     p++;
                     token = tkAsgExpo;
                 }
             }
             break;
         case '/':
             token = tkDiv;
             switch(this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkAsgDiv;
                 break;
             case '/':
                 if (p >= last)
                 {
                     AssertMsg(!m_fIsModuleCode, "Do we have other line comment cases scanning pass last?");

                     // Effective source length may have excluded HTMLCommentSuffix "//... -->". If we are scanning
                     // those, we have passed "last" already. Move back and return EOF.
                     p = last;
                     goto LEof;
                 }
                 ch = *++p;
                 firstChar = (OLECHAR)ch;
 LSkipLineComment:
                 pchT = NULL;
                 for (;;)
                 {
                     switch ((ch = this->ReadFirst(p, last)))
                     {
                     case kchLS:         // 0x2028, classifies as new line
                     case kchPS:         // 0x2029, classifies as new line
 LEcmaCommentLineBreak:
                         // kchPS and kchLS are more than one unit in UTF-8.
                         if (pchT)
                         {
                             // kchPS and kchLS are more than one unit in UTF-8.
                             p = pchT;
                         }
                         else
                         {
                             // But only a single code unit in UTF16
                             p--;
                         }
                         this->RestoreMultiUnits(multiUnits);
                         goto LCommentLineBreak;

                     case kchNWL:
                     case kchRET:
                         p--;
 LCommentLineBreak:
                         // Subtract the comment length from the total char count for the purpose
                         // of deciding whether to defer AST and byte code generation.
                         m_parser->ReduceDeferredScriptLength((ULONG)(p - m_pchMinTok));
                         break;
                     case kchNUL:
                         // Because we used ReadFirst, we have advanced p. The character that we are looking at is actually is p - 1.
                         // If p == last, we are looking at p - 1, it is still within the source buffer, and we need to consider it part of the comment
                         // Only if p > last that we have pass the source buffer and consider it a line break
                         if (p > last)
                         {
                             p--;
                             goto LCommentLineBreak;
                         }
                         continue;

                     default:
                         if (this->IsMultiUnitChar((OLECHAR)ch))
                         {
                             pchT = p - 1;
                             multiUnits = this->m_cMultiUnits;
                             switch (ch = this->template ReadRest<true>((OLECHAR)ch, p, last))
                             {
                                 case kchLS:
                                 case kchPS:
                                     goto LEcmaCommentLineBreak;
                             }
                         }
                         continue;
                     }

                     break;
                 }

                 continue;

             case '*':
                 ch = *++p;
                 firstChar = (OLECHAR)ch;
                 if ((p + 1) < last)
                 {
                     secondChar = (OLECHAR)(*(p + 1));
                 }
                 else
                 {
                     secondChar = '\0';
                 }

                 pchT = p;
                 commentStartLine = m_line;
                 bool containTypeDef;
                 if (tkNone == (token = SkipComment(&pchT, &containTypeDef)))
                 {
                     // Subtract the comment length from the total char count for the purpose
                     // of deciding whether to defer AST and byte code generation.
                     m_parser->ReduceDeferredScriptLength((ULONG)(pchT - m_pchMinTok));
                     p = pchT;
                     seenDelimitedCommentEnd = true;
                     goto LLoop;
                 }
                 p = pchT;
                 break;
             }
             break;
         case '%':
             Assert(chType == _C_PCT);
             token = tkPct;
             if (this->PeekFirst(p, last) == '=')
             {
                 p++;
                 token = tkAsgMod;
             }
             break;
         case '<':
             Assert(chType == _C_LT);
             token = tkLT;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkLE;
                 break;
             case '<':
                 p++;
                 token = tkLsh;
                 if (this->PeekFirst(p, last) == '=')
                 {
                     p++;
                     token = tkAsgLsh;
                     break;
                 }
                 break;
             case '!':
                 // ES 2015 B.1.3 -  HTML comments are only allowed when parsing non-module code.
                 if (!m_fIsModuleCode && this->PeekFirst(p + 1, last) == '-' && this->PeekFirst(p + 2, last) == '-')
                 {
                     // This is a "<!--" comment - treat as //
                     if (p >= last)
                     {
                         // Effective source length may have excluded HTMLCommentSuffix "<!-- ... -->". If we are scanning
                         // those, we have passed "last" already. Move back and return EOF.
                         p = last;
                         goto LEof;
                     }
                     firstChar = '!';
                     goto LSkipLineComment;
                 }
                 break;
             }
             break;
         case '>':
             Assert(chType == _C_GT);
             token = tkGT;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkGE;
                 break;
             case '>':
                 p++;
                 token = tkRsh;
                 switch (this->PeekFirst(p, last))
                 {
                 case '=':
                     p++;
                     token = tkAsgRsh;
                     break;
                 case '>':
                     p++;
                     token = tkRs2;
                     if (*p == '=')
                     {
                         p++;
                         token = tkAsgRs2;
                     }
                     break;
                 }
                 break;
             }
             break;
         case '^':
             Assert(chType == _C_XOR);
             token = tkXor;
             if (this->PeekFirst(p, last) == '=')
             {
                 p++;
                 token = tkAsgXor;
             }
             break;
         case '|':
             Assert(chType == _C_BAR);
             token = tkOr;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkAsgOr;
                 break;
             case '|':
                 p++;
                 token = tkLogOr;
                 break;
             }
             break;
         case '&':
             Assert(chType == _C_AMP);
             token = tkAnd;
             switch (this->PeekFirst(p, last))
             {
             case '=':
                 p++;
                 token = tkAsgAnd;
                 break;
             case '&':
                 p++;
                 token = tkLogAnd;
                 break;
             }
             break;
         case '\'':
         case '"':
             Assert(chType == _C_QUO || chType == _C_APO);
             pchT = p;
             token = this->ScanStringConstant((OLECHAR)ch, &pchT);
             p = pchT;
             break;
         }

         break;
     }

 LDone:
     m_currentCharacter = p;
     return (m_ptoken->tk = token);
 }

 template <typename EncodingPolicy>
 IdentPtr Scanner<EncodingPolicy>::GetSecondaryBufferAsPid()
 {
     bool createPid = true;

     if ((m_DeferredParseFlags & ScanFlagSuppressStrPid) != 0)
     {
         createPid = false;
     }

     if (createPid)
     {
         return this->GetHashTbl()->PidHashNameLen(m_tempChBufSecondary.m_prgch, m_tempChBufSecondary.m_ichCur);
     }
     else
     {
         return nullptr;
     }
 }

 template <typename EncodingPolicy>
 LPCOLESTR Scanner<EncodingPolicy>::StringFromLong(int32 lw)
 {
     _ltow_s(lw, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax, 10);
     return m_tempChBuf.m_prgch;
 }

 template <typename EncodingPolicy>
 IdentPtr Scanner<EncodingPolicy>::PidFromLong(int32 lw)
 {
     return this->GetHashTbl()->PidHashName(StringFromLong(lw));
 }

 template <typename EncodingPolicy>
 LPCOLESTR Scanner<EncodingPolicy>::StringFromDbl(double dbl)
 {
     if (!Js::NumberUtilities::FDblToStr(dbl, m_tempChBuf.m_prgch, m_tempChBuf.m_cchMax))
     {
         Error(ERRnoMemory);
     }
     return m_tempChBuf.m_prgch;
 }

 template <typename EncodingPolicy>
 IdentPtr Scanner<EncodingPolicy>::PidFromDbl(double dbl)
 {
     return this->GetHashTbl()->PidHashName(StringFromDbl(dbl));
 }


 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint)
 {
     Capture(restorePoint, 0, 0);
 }

 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr)
 {
     restorePoint->m_ichMinTok = this->IchMinTok();
     restorePoint->m_ichMinLine = this->IchMinLine();
     restorePoint->m_cMinTokMultiUnits = this->m_cMinTokMultiUnits;
     restorePoint->m_cMinLineMultiUnits = this->m_cMinLineMultiUnits;
     restorePoint->m_line = this->m_line;
     restorePoint->m_fHadEol = this->m_fHadEol;

     restorePoint->functionIdIncrement = functionIdIncrement;
     restorePoint->lengthDecr = lengthDecr;

 #ifdef DEBUG
     restorePoint->m_cMultiUnits = this->m_cMultiUnits;
 #endif
 }

 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint)
 {
     SeekAndScan<false>(restorePoint);
 }

 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::SeekToForcingPid(const RestorePoint& restorePoint)
 {
     SeekAndScan<true>(restorePoint);
 }

 template <typename EncodingPolicy>
 template <bool forcePid>
 void Scanner<EncodingPolicy>::SeekAndScan(const RestorePoint& restorePoint)
 {
     this->m_currentCharacter = this->m_pchBase + restorePoint.m_ichMinTok + restorePoint.m_cMinTokMultiUnits;
     this->m_pchMinLine = this->m_pchBase + restorePoint.m_ichMinLine + restorePoint.m_cMinLineMultiUnits;
     this->m_cMinLineMultiUnits = restorePoint.m_cMinLineMultiUnits;
     this->RestoreMultiUnits(restorePoint.m_cMinTokMultiUnits);

     if (forcePid)
     {
         this->ScanForcingPid();
     }
     else
     {
         this->Scan();
     }

     this->m_line = restorePoint.m_line;
     this->m_fHadEol = restorePoint.m_fHadEol;

     this->m_parser->ReduceDeferredScriptLength(restorePoint.lengthDecr);

     Assert(this->m_cMultiUnits == restorePoint.m_cMultiUnits);
 }

 template <typename EncodingPolicy>
 void Scanner<EncodingPolicy>::SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId)
 {
     SeekTo(restorePoint);
     *nextFunctionId += restorePoint.functionIdIncrement;
 }

 // Called by CompileScriptException::ProcessError to retrieve a BSTR for the line on which an error occurred.
 template<typename EncodingPolicy>
 HRESULT Scanner<EncodingPolicy>::SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine)
 {
     if( !pbstrLine )
     {
         return E_POINTER;
     }

     // If we overflow the string, we have a serious problem...
     if (ichMinLine < 0 || static_cast<size_t>(ichMinLine) > AdjustedLength() )
     {
         return E_UNEXPECTED;
     }

     typename EncodingPolicy::EncodedCharPtr pStart = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, ichMinLine);

     // Determine the length by scanning for the next newline
     size_t cb = 0;
     charcount_t cch = LineLength(pStart, m_pchLast, &cb);
     Assert(cch <= LONG_MAX);

     typename EncodingPolicy::EncodedCharPtr pEnd = static_cast<size_t>(ichMinLine) == IchMinLine() ? m_pchMinLine + cb : m_pchBase + this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, cch);

     *pbstrLine = SysAllocStringLen(NULL, cch);
     if (!*pbstrLine)
     {
         return E_OUTOFMEMORY;
     }

     this->ConvertToUnicode(*pbstrLine, cch, pStart, pEnd);
     return S_OK;
 }

 template class Scanner<NotNullTerminatedUTF8EncodingPolicy>;