lib/Parser/Scan.h - external/github.com/Microsoft/ChakraCore - Git at Google

 //-------------------------------------------------------------------------------------------------------
 // Copyright (C) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #pragma once

 #ifdef ENABLE_GLOBALIZATION
 namespace Js
 {
     class DelayLoadWindowsGlobalization;
 }
 #include "Windows.Globalization.h"
 #endif

 int CountNewlines(LPCOLESTR psz);

 class Parser;
 struct ParseContext;

 struct Token
 {
 private:
     union
     {
         struct
         {
             IdentPtr pid;
             const char * pchMin;
             int32 length;
         };
         int32 lw;
         struct
         {
             double dbl;
             // maybeInt will be true if the number did not contain 'e', 'E' , or '.'
             // notably important in asm.js where the '.' has semantic importance
             bool maybeInt;
         };
         UnifiedRegex::RegexPattern* pattern;
         struct
         {
             charcount_t ichMin;
             charcount_t ichLim;
         };
     } u;
     IdentPtr CreateIdentifier(HashTbl * hashTbl);
 public:
     Token() : tk(tkLim) {}
     tokens tk;

     BOOL IsIdentifier() const
     {
         return tk == tkID;
     }

     IdentPtr GetStr() const
     {
         Assert(tk == tkStrCon || tk == tkStrTmplBasic || tk == tkStrTmplBegin || tk == tkStrTmplMid || tk == tkStrTmplEnd);
         return u.pid;
     }
     IdentPtr GetIdentifier(HashTbl * hashTbl)
     {
         Assert(IsIdentifier() || IsReservedWord());
         if (u.pid)
         {
             return u.pid;
         }
         return CreateIdentifier(hashTbl);
     }

     int32 GetLong() const
     {
         Assert(tk == tkIntCon);
         return u.lw;
     }

     double GetDouble() const
     {
         Assert(tk == tkFltCon);
         return u.dbl;
     }

     bool GetDoubleMayBeInt() const
     {
         Assert(tk == tkFltCon);
         return u.maybeInt;
     }
     UnifiedRegex::RegexPattern * GetRegex()
     {
         Assert(tk == tkRegExp);
         return u.pattern;
     }

     // NOTE: THESE ROUTINES DEPEND ON THE ORDER THAT OPERATORS
     // ARE DECLARED IN kwd-xxx.h FILES.

     BOOL IsReservedWord() const
     {
         // Keywords and future reserved words (does not include operators)
         return tk < tkID;
     }

     BOOL IsKeyword() const;

     BOOL IsFutureReservedWord(const BOOL isStrictMode) const
     {
         // Reserved words that are not keywords
         return tk >= tkENUM && tk <= (isStrictMode ? tkSTATIC : tkENUM);
     }

     BOOL IsOperator() const
     {
         return tk >= tkComma && tk < tkLParen;
     }

     // UTF16 Scanner are only for syntax coloring.  Only support
     // defer pid creation for UTF8
     void SetIdentifier(const char * pchMin, int32 len)
     {
         this->u.pid = nullptr;
         this->u.pchMin = pchMin;
         this->u.length = len;
     }
     void SetIdentifier(IdentPtr pid)
     {
         this->u.pid = pid;
         this->u.pchMin = nullptr;
     }

     void SetLong(int32 value)
     {
         this->u.lw = value;
     }

     void SetDouble(double dbl, bool maybeInt)
     {
         this->u.dbl = dbl;
         this->u.maybeInt = maybeInt;
     }

     tokens SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser);
 };

 typedef BYTE UTF8Char;
 typedef UTF8Char* UTF8CharPtr;

 class NullTerminatedUnicodeEncodingPolicy
 {
 public:
     typedef OLECHAR EncodedChar;
     typedef const OLECHAR *EncodedCharPtr;

 protected:
     static const bool MultiUnitEncoding = false;
     static const size_t m_cMultiUnits = 0;

     static BOOL IsMultiUnitChar(OLECHAR ch) { return FALSE; }
     // See comment below regarding unused 'last' parameter
     static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
     template <bool bScan>
     static OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last) { return ch; }
     template <bool bScan>
     static OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
     static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
     static OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) { return *p; }

     static OLECHAR ReadSurrogatePairUpper(const EncodedCharPtr&, const EncodedCharPtr& last)
     {
         AssertMsg(false, "method should not be called while scanning UTF16 string");
         return 0xfffe;
     }

     static void RestoreMultiUnits(size_t multiUnits) { }
     static size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) { return offset; }

     static void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
     {
         Unused(end);
         js_memcpy_s(pch, cch * sizeof(OLECHAR), start, cch * sizeof(OLECHAR));
     }

 public:
     void Clear() {}
     void SetIsUtf8(bool isUtf8) { }
     bool IsUtf8() const { return false; }
 };

 template <bool nullTerminated>
 class UTF8EncodingPolicyBase
 {
 public:
     typedef utf8char_t EncodedChar;
     typedef LPCUTF8 EncodedCharPtr;

 protected:
     static const bool MultiUnitEncoding = true;

     size_t m_cMultiUnits;
     utf8::DecodeOptions m_decodeOptions;

     UTF8EncodingPolicyBase() { Clear(); }

     static BOOL IsMultiUnitChar(OLECHAR ch) { return ch > 0x7f; }
     // Note when nullTerminated is false we still need to increment the character pointer because the scanner "puts back" this virtual null character by decrementing the pointer
     static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p++) : (p++, 0); }

     // "bScan" indicates if this ReadFull is part of scanning. Pass true during scanning and ReadFull will update
     // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
     // otherwise and this doesn't affect Scanner state.
     template <bool bScan>
     OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last)
     {
         EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
         return !IsMultiUnitChar(ch) ? static_cast<OLECHAR>(ch) : ReadRest<bScan>(ch, p, last);
     }

     OLECHAR ReadSurrogatePairUpper(EncodedCharPtr &p, EncodedCharPtr last)
     {
         EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
         Assert(IsMultiUnitChar(ch));
         this->m_decodeOptions |= utf8::DecodeOptions::doSecondSurrogatePair;
         return ReadRest<true>(ch, p, last);
     }

     static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p) : 0; }

     OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last)
     {
         OLECHAR result = PeekFirst(p, last);
         if (IsMultiUnitChar(result))
         {
             result = ReadFull<false>(p, last);
         }
         return result;
     }

     // "bScan" indicates if this ReadRest is part of scanning. Pass true during scanning and ReadRest will update
     // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false
     // otherwise and this doesn't affect Scanner state.
     template <bool bScan>
     OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last)
     {
         EncodedCharPtr s;
         if (bScan)
         {
             s = p;
         }
         OLECHAR result = utf8::DecodeTail(ch, p, last, m_decodeOptions);
         if (bScan)
         {
             // If we are scanning, update m_cMultiUnits counter.
             m_cMultiUnits += p - s;
         }
         return result;
     }
     void RestoreMultiUnits(size_t multiUnits) { m_cMultiUnits = multiUnits; }

     size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset)
     {
         // Note: current may be before or after last. If last is the null terminator, current should be within [start, last].
         // But if we excluded HTMLCommentSuffix for the source, last is before "// -->\0". Scanner may stop at null
         // terminator past last, then current is after last.
         Assert(current >= start);
         size_t currentUnitOffset = current - start;
         Assert(currentUnitOffset > m_cMultiUnits);
         Assert(currentUnitOffset - m_cMultiUnits < LONG_MAX);
         charcount_t currentCharacterOffset = charcount_t(currentUnitOffset - m_cMultiUnits);

         // If the offset is the current character offset then just return the current unit offset.
         if (currentCharacterOffset == offset) return currentUnitOffset;

         // If we have not encountered any multi-unit characters and we are moving backward the
         // character index and unit index are 1:1 so just return offset
         if (m_cMultiUnits == 0 && offset <= currentCharacterOffset) return offset;

         // Use local decode options
         utf8::DecodeOptions decodeOptions = IsUtf8() ? utf8::doDefault : utf8::doAllowThreeByteSurrogates;

         if (offset > currentCharacterOffset)
         {
             // If we are looking for an offset past current, current must be within [start, last]. We don't expect seeking
             // scanner position past last.
             Assert(current <= last);

             // If offset > currentOffset we already know the current character offset. The unit offset is the
             // unit index of offset - currentOffset characters from current.
             charcount_t charsLeft = offset - currentCharacterOffset;
             return currentUnitOffset + utf8::CharacterIndexToByteIndex(current, last - current, charsLeft, decodeOptions);
         }

         // If all else fails calculate the index from the start of the buffer.
         return utf8::CharacterIndexToByteIndex(start, currentUnitOffset, offset, decodeOptions);
     }

     void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end)
     {
         m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doSecondSurrogatePair);
         utf8::DecodeUnitsInto(pch, start, end, m_decodeOptions);
     }

 public:
     void Clear()
     {
         m_cMultiUnits = 0;
         m_decodeOptions = utf8::doAllowThreeByteSurrogates;
     }

     // If we get UTF8 source buffer, turn off doAllowThreeByteSurrogates but allow invalid WCHARs without replacing them with replacement 'g_chUnknown'.
     void SetIsUtf8(bool isUtf8)
     {
         if (isUtf8)
         {
             m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowThreeByteSurrogates | utf8::doAllowInvalidWCHARs);
         }
         else
         {
             m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowInvalidWCHARs | utf8::doAllowThreeByteSurrogates);
         }
     }
     bool IsUtf8() const { return (m_decodeOptions & utf8::doAllowThreeByteSurrogates) == 0; }
 };

 typedef UTF8EncodingPolicyBase<false> NotNullTerminatedUTF8EncodingPolicy;

 interface IScanner
 {
     virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine) = 0;
     virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine) = 0;
 };

 // Flags that can be provided to the Scan functions.
 // These can be bitwise OR'ed.
 enum ScanFlag
 {
     ScanFlagNone = 0,
     ScanFlagSuppressStrPid = 1,   // Force strings to always have pid
 };

 typedef HRESULT (*CommentCallback)(void *data, OLECHAR firstChar, OLECHAR secondChar, bool containTypeDef, charcount_t min, charcount_t lim, bool adjacent, bool multiline, charcount_t startLine, charcount_t endLine);

 // Restore point defined using a relative offset rather than a pointer.
 struct RestorePoint
 {
     Field(charcount_t) m_ichMinTok;
     Field(charcount_t) m_ichMinLine;
     Field(size_t) m_cMinTokMultiUnits;
     Field(size_t) m_cMinLineMultiUnits;
     Field(charcount_t) m_line;
     Field(uint) functionIdIncrement;
     Field(size_t) lengthDecr;
     Field(BOOL) m_fHadEol;

 #ifdef DEBUG
     Field(size_t) m_cMultiUnits;
 #endif

     RestorePoint()
         : m_ichMinTok((charcount_t)-1),
           m_ichMinLine((charcount_t)-1),
           m_cMinTokMultiUnits((size_t)-1),
           m_cMinLineMultiUnits((size_t)-1),
           m_line((charcount_t)-1),
           functionIdIncrement(0),
           lengthDecr(0),
           m_fHadEol(FALSE)
 #ifdef DEBUG
           , m_cMultiUnits((size_t)-1)
 #endif
     {
     };
 };

 template <typename EncodingPolicy>
 class Scanner : public IScanner, public EncodingPolicy
 {
     friend Parser;
     typedef typename EncodingPolicy::EncodedChar EncodedChar;
     typedef typename EncodingPolicy::EncodedCharPtr EncodedCharPtr;

 public:
     Scanner(Parser* parser, Token *ptoken, Js::ScriptContext *scriptContext);
     ~Scanner(void);

     tokens Scan();
     tokens ScanNoKeywords();
     tokens ScanForcingPid();
     void SetText(EncodedCharPtr psz, size_t offset, size_t length, charcount_t characterOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber = 0);
 #if ENABLE_BACKGROUND_PARSING
     void PrepareForBackgroundParse(Js::ScriptContext *scriptContext);
 #endif
     enum ScanState
     {
         ScanStateNormal = 0,
         ScanStateStringTemplateMiddleOrEnd = 1,
     };

     ScanState GetScanState() { return m_scanState; }
     void SetScanState(ScanState state) { m_scanState = state; }

     bool SetYieldIsKeywordRegion(bool fYieldIsKeywordRegion)
     {
         bool fPrevYieldIsKeywordRegion = m_fYieldIsKeywordRegion;
         m_fYieldIsKeywordRegion = fYieldIsKeywordRegion;
         return fPrevYieldIsKeywordRegion;
     }
     bool YieldIsKeywordRegion()
     {
         return m_fYieldIsKeywordRegion;
     }
     bool YieldIsKeyword()
     {
         return YieldIsKeywordRegion() || this->IsStrictMode();
     }

     bool SetAwaitIsKeywordRegion(bool fAwaitIsKeywordRegion)
     {
         bool fPrevAwaitIsKeywordRegion = m_fAwaitIsKeywordRegion;
         m_fAwaitIsKeywordRegion = fAwaitIsKeywordRegion;
         return fPrevAwaitIsKeywordRegion;
     }
     bool AwaitIsKeywordRegion()
     {
         return m_fAwaitIsKeywordRegion;
     }

     bool AwaitIsKeyword()
     {
         return AwaitIsKeywordRegion() || this->m_fIsModuleCode;
     }

     tokens TryRescanRegExp();
     tokens RescanRegExp();
     tokens RescanRegExpNoAST();
     tokens RescanRegExpTokenizer();

     BOOL FHadNewLine(void)
     {
         return m_fHadEol;
     }
     IdentPtr PidFromLong(int32 lw);
     IdentPtr PidFromDbl(double dbl);

     LPCOLESTR StringFromLong(int32 lw);
     LPCOLESTR StringFromDbl(double dbl);

     IdentPtr GetSecondaryBufferAsPid();

     BYTE SetDeferredParse(BOOL defer)
     {
         BYTE fOld = m_DeferredParseFlags;
         if (defer)
         {
             m_DeferredParseFlags |= ScanFlagSuppressStrPid;
         }
         else
         {
             m_DeferredParseFlags = ScanFlagNone;
         }
         return fOld;
     }

     void SetDeferredParseFlags(BYTE flags)
     {
         m_DeferredParseFlags = flags;
     }

     // the functions IsDoubleQuoteOnLastTkStrCon() and IsHexOrOctOnLastTKNumber() works only with a scanner without lookahead
     // Both functions are used to get more info on the last token for specific diffs necessary for JSON parsing.


     //Single quotes are not legal in JSON strings. Make distinction between single quote string constant and single quote string
     BOOL IsDoubleQuoteOnLastTkStrCon()
     {
         return m_doubleQuoteOnLastTkStrCon;
     }

     // True if all chars of last string constant are ascii
     BOOL IsEscapeOnLastTkStrCon()
     {
       return m_EscapeOnLastTkStrCon;
     }


     bool IsOctOrLeadingZeroOnLastTKNumber()
     {
         return m_OctOrLeadingZeroOnLastTKNumber;
     }

     // Returns the character offset of the first token. The character offset is the offset the first character of the token would
     // have if the entire file was converted to Unicode (UTF16-LE).
     charcount_t IchMinTok(void) const
     {
         Assert(m_pchMinTok - m_pchBase >= 0);
         Assert(m_pchMinTok - m_pchBase <= LONG_MAX);
         Assert(static_cast<charcount_t>(m_pchMinTok - m_pchBase) >= m_cMinTokMultiUnits);
         return static_cast<charcount_t>(m_pchMinTok - m_pchBase - m_cMinTokMultiUnits);
     }

     // Returns the character offset of the character immediately following the token. The character offset is the offset the first
     // character of the token would have if the entire file was converted to Unicode (UTF16-LE).
     charcount_t IchLimTok(void) const
     {

         Assert(m_currentCharacter - m_pchBase >= 0);
         Assert(m_currentCharacter - m_pchBase <= LONG_MAX);
         Assert(static_cast<charcount_t>(m_currentCharacter - m_pchBase) >= this->m_cMultiUnits);
         return static_cast<charcount_t>(m_currentCharacter - m_pchBase - this->m_cMultiUnits);
     }

     void SetErrorPosition(charcount_t ichMinError, charcount_t ichLimError)
     {
         Assert(ichLimError > 0 || ichMinError == 0);
         m_ichMinError = ichMinError;
         m_ichLimError = ichLimError;
     }

     charcount_t IchMinError(void) const
     {
         return m_ichLimError ? m_ichMinError : IchMinTok();
     }

     charcount_t IchLimError(void) const
     {
         return m_ichLimError ? m_ichLimError : IchLimTok();
     }

     // Returns the encoded unit offset of first character of the token. For example, in a UTF-8 encoding this is the offset into
     // the UTF-8 buffer. In Unicode this is the same as IchMinTok().
     size_t IecpMinTok(void) const
     {
         return static_cast< size_t >(m_pchMinTok  - m_pchBase);
     }

     // Returns the encoded unit offset of the character immediately following the token. For example, in a UTF-8 encoding this is
     // the offset into the UTF-8 buffer. In Unicode this is the same as IchLimTok().
     size_t IecpLimTok(void) const
     {
         return static_cast< size_t >(m_currentCharacter - m_pchBase);
     }

     size_t IecpLimTokPrevious() const
     {
         AssertMsg(m_iecpLimTokPrevious != (size_t)-1, "IecpLimTokPrevious() cannot be called before scanning a token");
         return m_iecpLimTokPrevious;
     }

     charcount_t IchLimTokPrevious() const
     {
         AssertMsg(m_ichLimTokPrevious != (charcount_t)-1, "IchLimTokPrevious() cannot be called before scanning a token");
         return m_ichLimTokPrevious;
     }

     IdentPtr PidAt(size_t iecpMin, size_t iecpLim);

     // Returns the character offset within the stream of the first character on the current line.
     charcount_t IchMinLine(void) const
     {
         Assert(m_pchMinLine - m_pchBase >= 0);
         Assert(m_pchMinLine - m_pchBase <= LONG_MAX);
         Assert(static_cast<charcount_t>(m_pchMinLine - m_pchBase) >= m_cMinLineMultiUnits);
         return static_cast<charcount_t>(m_pchMinLine - m_pchBase - m_cMinLineMultiUnits);
     }

     // Returns the current line number
     charcount_t LineCur(void) const { return m_line; }

     void SetCurrentCharacter(charcount_t offset, ULONG lineNumber = 0)
     {
         DebugOnly(m_iecpLimTokPrevious = (size_t)-1);
         DebugOnly(m_ichLimTokPrevious = (charcount_t)-1);
         size_t length = m_pchLast - m_pchBase;
         if (offset > length) offset = static_cast< charcount_t >(length);
         size_t ibOffset = this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, offset);
         m_currentCharacter = m_pchBase + ibOffset;
         Assert(ibOffset >= offset);
         this->RestoreMultiUnits(ibOffset - offset);
         m_line = lineNumber;
     }

     // IScanner methods
     virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine)
     {
         ichMin = this->IchMinError();
         ichLim = this->IchLimError();
         line   = this->LineCur();
         ichMinLine = this->IchMinLine();
         if (m_ichLimError && m_ichMinError < (charcount_t)ichMinLine)
         {
             line = m_startLine;
             ichMinLine = UpdateLine(line, m_pchStartLine, m_pchLast, 0, ichMin);
         }
     }

     virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine);

     class TemporaryBuffer
     {
         friend Scanner<EncodingPolicy>;

     private:
         // Keep a reference to the scanner.
         // We will use it to signal an error if we fail to allocate the buffer.
         Scanner<EncodingPolicy>* m_pscanner;
         uint32 m_cchMax;
         uint32 m_ichCur;
         __field_ecount(m_cchMax) OLECHAR *m_prgch;
         byte m_rgbInit[256];

     public:
         TemporaryBuffer()
         {
             m_pscanner = nullptr;
             m_prgch = (OLECHAR*)m_rgbInit;
             m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
             m_ichCur = 0;
         }

         ~TemporaryBuffer()
         {
             if (m_prgch != (OLECHAR*)m_rgbInit)
             {
                 free(m_prgch);
             }
         }

         void Reset()
         {
             m_ichCur = 0;
         }

         void Clear()
         {
             if (m_prgch != (OLECHAR*)m_rgbInit)
             {
                 free(m_prgch);
                 m_prgch = (OLECHAR*)m_rgbInit;
                 m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR);
             }
             Reset();
         }

         void AppendCh(uint ch)
         {
             return AppendCh<true>(ch);
         }

         template<bool performAppend> void AppendCh(uint ch)
         {
             if (performAppend)
             {
                 if (m_ichCur >= m_cchMax)
                 {
                     Grow();
                 }

                 Assert(m_ichCur < m_cchMax);
                 __analysis_assume(m_ichCur < m_cchMax);

                 m_prgch[m_ichCur++] = static_cast<OLECHAR>(ch);
             }
         }

     private:
         void Grow()
         {
             Assert(m_pscanner != nullptr);
             byte *prgbNew;
             byte *prgbOld = (byte *)m_prgch;

             ULONG cbNew;
             if (FAILED(ULongMult(m_cchMax, sizeof(OLECHAR) * 2, &cbNew)))
             {
                 m_pscanner->Error(ERRnoMemory);
             }

             if (prgbOld == m_rgbInit)
             {
                 if (nullptr == (prgbNew = static_cast<byte*>(malloc(cbNew))))
                     m_pscanner->Error(ERRnoMemory);
                 js_memcpy_s(prgbNew, cbNew, prgbOld, m_ichCur * sizeof(OLECHAR));
             }
             else if (nullptr == (prgbNew = static_cast<byte*>(realloc(prgbOld, cbNew))))
             {
                 m_pscanner->Error(ERRnoMemory);
             }

             m_prgch = (OLECHAR*)prgbNew;
             m_cchMax = cbNew / sizeof(OLECHAR);
         }
     };

     void Capture(_Out_ RestorePoint* restorePoint);
     void SeekTo(const RestorePoint& restorePoint);
     void SeekToForcingPid(const RestorePoint& restorePoint);

     void Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr);
     void SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId);

     void Clear();

     HashTbl * GetHashTbl() { return &m_htbl; }
 private:
     Parser *m_parser;
     HashTbl m_htbl;
     Token *m_ptoken;
     EncodedCharPtr m_pchBase;          // beginning of source
     EncodedCharPtr m_pchLast;          // The end of source
     EncodedCharPtr m_pchMinLine;       // beginning of current line
     EncodedCharPtr m_pchMinTok;        // beginning of current token
     EncodedCharPtr m_currentCharacter; // current character
     EncodedCharPtr m_pchPrevLine;      // beginning of previous line
     size_t m_cMinTokMultiUnits;        // number of multi-unit characters previous to m_pchMinTok
     size_t m_cMinLineMultiUnits;       // number of multi-unit characters previous to m_pchMinLine
     uint16 m_fStringTemplateDepth;     // we should treat } as string template middle starting character (depth instead of flag)
     BOOL m_fHadEol;
     BOOL m_fIsModuleCode : 1;
     BOOL m_doubleQuoteOnLastTkStrCon :1;
     bool m_OctOrLeadingZeroOnLastTKNumber :1;
     bool m_EscapeOnLastTkStrCon:1;
     BOOL m_fNextStringTemplateIsTagged:1;   // the next string template scanned has a tag (must create raw strings)
     BYTE m_DeferredParseFlags:2;            // suppressStrPid and suppressIdPid
     bool es6UnicodeMode;                // True if ES6Unicode Extensions are enabled.
     bool m_fYieldIsKeywordRegion;       // Whether to treat 'yield' as an identifier or keyword
     bool m_fAwaitIsKeywordRegion;       // Whether to treat 'await' as an identifier or keyword

     // Temporary buffer.
     TemporaryBuffer m_tempChBuf;
     TemporaryBuffer m_tempChBufSecondary;

     charcount_t m_line;
     ScanState m_scanState;

     charcount_t m_ichMinError;
     charcount_t m_ichLimError;
     charcount_t m_startLine;
     EncodedCharPtr m_pchStartLine;

     Js::ScriptContext* m_scriptContext;
     const Js::CharClassifier *charClassifier;

     tokens m_tkPrevious;
     size_t m_iecpLimTokPrevious;
     charcount_t m_ichLimTokPrevious;

     void ClearStates();

     template <bool forcePid>
     void SeekAndScan(const RestorePoint& restorePoint);

     tokens ScanCore(bool identifyKwds);
     tokens ScanAhead();

     tokens ScanError(EncodedCharPtr pchCur, tokens errorToken)
     {
         m_currentCharacter = pchCur;
         return m_ptoken->tk = tkScanError;
     }

     __declspec(noreturn) void Error(HRESULT hr)
     {
         m_pchMinTok = m_currentCharacter;
         m_cMinTokMultiUnits = this->m_cMultiUnits;
         throw ParseExceptionObject(hr);
     }

     const EncodedCharPtr PchBase(void) const
     {
         return m_pchBase;
     }
     const EncodedCharPtr PchMinTok(void)
     {
         return m_pchMinTok;
     }

     template<bool stringTemplateMode, bool createRawString> tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);
     tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp);

     tokens ScanStringTemplateBegin(EncodedCharPtr *pp);
     tokens ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp);

     void ScanNewLine(uint ch);
     void NotifyScannedNewLine();
     charcount_t LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb);

     tokens ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp);
     BOOL FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last);
     tokens ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp);
     tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef);
     tokens ScanRegExpConstant(ArenaAllocator* alloc);
     tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc);
     EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt);
     IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar);
     IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last);
     uint32 UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last);

     void SaveSrcPos(void)
     {
         m_pchMinTok = m_currentCharacter;
     }
     OLECHAR PeekNextChar(void)
     {
         return this->PeekFull(m_currentCharacter, m_pchLast);
     }
     OLECHAR ReadNextChar(void)
     {
         return this->template ReadFull<true>(m_currentCharacter, m_pchLast);
     }

     EncodedCharPtr AdjustedLast() const
     {
         return m_pchLast;
     }

     size_t AdjustedLength() const
     {
         return AdjustedLast() - m_pchBase;
     }

     bool IsStrictMode() const
     {
         return this->m_parser != NULL && this->m_parser->IsStrictMode();
     }

     // This function expects the first character to be a 'u'
     // It will attempt to return a codepoint represented by a single escape point (either of the form \uXXXX or \u{any number of hex characters, s.t. value < 0x110000}
     bool TryReadEscape(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar = nullptr);

     template <bool bScan>
     bool TryReadCodePointRest(codepoint_t lower, EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar);

     template <bool bScan>
     inline bool TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar);

     inline BOOL IsIdContinueNext(EncodedCharPtr startingLocation, EncodedCharPtr endOfSource)
     {
         codepoint_t nextCodepoint;
         bool ignore;

         if (TryReadCodePoint<false>(startingLocation, endOfSource, &nextCodepoint, &ignore, &ignore))
         {
             return charClassifier->IsIdContinue(nextCodepoint);
         }

         return false;
     }

     charcount_t UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd);
 };