| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| #pragma once |
| |
| #ifdef ENABLE_GLOBALIZATION |
| namespace Js |
| { |
| class DelayLoadWindowsGlobalization; |
| } |
| #include "Windows.Globalization.h" |
| #endif |
| |
| int CountNewlines(LPCOLESTR psz); |
| |
| class Parser; |
| struct ParseContext; |
| |
| struct Token |
| { |
| private: |
| union |
| { |
| struct |
| { |
| IdentPtr pid; |
| const char * pchMin; |
| int32 length; |
| }; |
| int32 lw; |
| struct |
| { |
| double dbl; |
| // maybeInt will be true if the number did not contain 'e', 'E' , or '.' |
| // notably important in asm.js where the '.' has semantic importance |
| bool maybeInt; |
| }; |
| UnifiedRegex::RegexPattern* pattern; |
| struct |
| { |
| charcount_t ichMin; |
| charcount_t ichLim; |
| }; |
| } u; |
| IdentPtr CreateIdentifier(HashTbl * hashTbl); |
| public: |
| Token() : tk(tkLim) {} |
| tokens tk; |
| |
| BOOL IsIdentifier() const |
| { |
| return tk == tkID; |
| } |
| |
| IdentPtr GetStr() const |
| { |
| Assert(tk == tkStrCon || tk == tkStrTmplBasic || tk == tkStrTmplBegin || tk == tkStrTmplMid || tk == tkStrTmplEnd); |
| return u.pid; |
| } |
| IdentPtr GetIdentifier(HashTbl * hashTbl) |
| { |
| Assert(IsIdentifier() || IsReservedWord()); |
| if (u.pid) |
| { |
| return u.pid; |
| } |
| return CreateIdentifier(hashTbl); |
| } |
| |
| int32 GetLong() const |
| { |
| Assert(tk == tkIntCon); |
| return u.lw; |
| } |
| |
| double GetDouble() const |
| { |
| Assert(tk == tkFltCon); |
| return u.dbl; |
| } |
| |
| bool GetDoubleMayBeInt() const |
| { |
| Assert(tk == tkFltCon); |
| return u.maybeInt; |
| } |
| UnifiedRegex::RegexPattern * GetRegex() |
| { |
| Assert(tk == tkRegExp); |
| return u.pattern; |
| } |
| |
| // NOTE: THESE ROUTINES DEPEND ON THE ORDER THAT OPERATORS |
| // ARE DECLARED IN kwd-xxx.h FILES. |
| |
| BOOL IsReservedWord() const |
| { |
| // Keywords and future reserved words (does not include operators) |
| return tk < tkID; |
| } |
| |
| BOOL IsKeyword() const; |
| |
| BOOL IsFutureReservedWord(const BOOL isStrictMode) const |
| { |
| // Reserved words that are not keywords |
| return tk >= tkENUM && tk <= (isStrictMode ? tkSTATIC : tkENUM); |
| } |
| |
| BOOL IsOperator() const |
| { |
| return tk >= tkComma && tk < tkLParen; |
| } |
| |
| // UTF16 Scanner are only for syntax coloring. Only support |
| // defer pid creation for UTF8 |
| void SetIdentifier(const char * pchMin, int32 len) |
| { |
| this->u.pid = nullptr; |
| this->u.pchMin = pchMin; |
| this->u.length = len; |
| } |
| void SetIdentifier(IdentPtr pid) |
| { |
| this->u.pid = pid; |
| this->u.pchMin = nullptr; |
| } |
| |
| void SetLong(int32 value) |
| { |
| this->u.lw = value; |
| } |
| |
| void SetDouble(double dbl, bool maybeInt) |
| { |
| this->u.dbl = dbl; |
| this->u.maybeInt = maybeInt; |
| } |
| |
| tokens SetRegex(UnifiedRegex::RegexPattern *const pattern, Parser *const parser); |
| }; |
| |
| typedef BYTE UTF8Char; |
| typedef UTF8Char* UTF8CharPtr; |
| |
| class NullTerminatedUnicodeEncodingPolicy |
| { |
| public: |
| typedef OLECHAR EncodedChar; |
| typedef const OLECHAR *EncodedCharPtr; |
| |
| protected: |
| static const bool MultiUnitEncoding = false; |
| static const size_t m_cMultiUnits = 0; |
| |
| static BOOL IsMultiUnitChar(OLECHAR ch) { return FALSE; } |
| // See comment below regarding unused 'last' parameter |
| static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; } |
| template <bool bScan> |
| static OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last) { return ch; } |
| template <bool bScan> |
| static OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; } |
| static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return *p; } |
| static OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) { return *p; } |
| |
| static OLECHAR ReadSurrogatePairUpper(const EncodedCharPtr&, const EncodedCharPtr& last) |
| { |
| AssertMsg(false, "method should not be called while scanning UTF16 string"); |
| return 0xfffe; |
| } |
| |
| static void RestoreMultiUnits(size_t multiUnits) { } |
| static size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) { return offset; } |
| |
| static void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end) |
| { |
| Unused(end); |
| js_memcpy_s(pch, cch * sizeof(OLECHAR), start, cch * sizeof(OLECHAR)); |
| } |
| |
| public: |
| void Clear() {} |
| void SetIsUtf8(bool isUtf8) { } |
| bool IsUtf8() const { return false; } |
| }; |
| |
| template <bool nullTerminated> |
| class UTF8EncodingPolicyBase |
| { |
| public: |
| typedef utf8char_t EncodedChar; |
| typedef LPCUTF8 EncodedCharPtr; |
| |
| protected: |
| static const bool MultiUnitEncoding = true; |
| |
| size_t m_cMultiUnits; |
| utf8::DecodeOptions m_decodeOptions; |
| |
| UTF8EncodingPolicyBase() { Clear(); } |
| |
| static BOOL IsMultiUnitChar(OLECHAR ch) { return ch > 0x7f; } |
| // Note when nullTerminated is false we still need to increment the character pointer because the scanner "puts back" this virtual null character by decrementing the pointer |
| static OLECHAR ReadFirst(EncodedCharPtr &p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p++) : (p++, 0); } |
| |
| // "bScan" indicates if this ReadFull is part of scanning. Pass true during scanning and ReadFull will update |
| // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false |
| // otherwise and this doesn't affect Scanner state. |
| template <bool bScan> |
| OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) |
| { |
| EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0); |
| return !IsMultiUnitChar(ch) ? static_cast<OLECHAR>(ch) : ReadRest<bScan>(ch, p, last); |
| } |
| |
| OLECHAR ReadSurrogatePairUpper(EncodedCharPtr &p, EncodedCharPtr last) |
| { |
| EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0); |
| Assert(IsMultiUnitChar(ch)); |
| this->m_decodeOptions |= utf8::DecodeOptions::doSecondSurrogatePair; |
| return ReadRest<true>(ch, p, last); |
| } |
| |
| static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast<OLECHAR>(*p) : 0; } |
| |
| OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) |
| { |
| OLECHAR result = PeekFirst(p, last); |
| if (IsMultiUnitChar(result)) |
| { |
| result = ReadFull<false>(p, last); |
| } |
| return result; |
| } |
| |
| // "bScan" indicates if this ReadRest is part of scanning. Pass true during scanning and ReadRest will update |
| // related Scanner state. The caller is supposed to sync result "p" to Scanner's current position. Pass false |
| // otherwise and this doesn't affect Scanner state. |
| template <bool bScan> |
| OLECHAR ReadRest(OLECHAR ch, EncodedCharPtr &p, EncodedCharPtr last) |
| { |
| EncodedCharPtr s; |
| if (bScan) |
| { |
| s = p; |
| } |
| OLECHAR result = utf8::DecodeTail(ch, p, last, m_decodeOptions); |
| if (bScan) |
| { |
| // If we are scanning, update m_cMultiUnits counter. |
| m_cMultiUnits += p - s; |
| } |
| return result; |
| } |
| void RestoreMultiUnits(size_t multiUnits) { m_cMultiUnits = multiUnits; } |
| |
| size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) |
| { |
| // Note: current may be before or after last. If last is the null terminator, current should be within [start, last]. |
| // But if we excluded HTMLCommentSuffix for the source, last is before "// -->\0". Scanner may stop at null |
| // terminator past last, then current is after last. |
| Assert(current >= start); |
| size_t currentUnitOffset = current - start; |
| Assert(currentUnitOffset > m_cMultiUnits); |
| Assert(currentUnitOffset - m_cMultiUnits < LONG_MAX); |
| charcount_t currentCharacterOffset = charcount_t(currentUnitOffset - m_cMultiUnits); |
| |
| // If the offset is the current character offset then just return the current unit offset. |
| if (currentCharacterOffset == offset) return currentUnitOffset; |
| |
| // If we have not encountered any multi-unit characters and we are moving backward the |
| // character index and unit index are 1:1 so just return offset |
| if (m_cMultiUnits == 0 && offset <= currentCharacterOffset) return offset; |
| |
| // Use local decode options |
| utf8::DecodeOptions decodeOptions = IsUtf8() ? utf8::doDefault : utf8::doAllowThreeByteSurrogates; |
| |
| if (offset > currentCharacterOffset) |
| { |
| // If we are looking for an offset past current, current must be within [start, last]. We don't expect seeking |
| // scanner position past last. |
| Assert(current <= last); |
| |
| // If offset > currentOffset we already know the current character offset. The unit offset is the |
| // unit index of offset - currentOffset characters from current. |
| charcount_t charsLeft = offset - currentCharacterOffset; |
| return currentUnitOffset + utf8::CharacterIndexToByteIndex(current, last - current, charsLeft, decodeOptions); |
| } |
| |
| // If all else fails calculate the index from the start of the buffer. |
| return utf8::CharacterIndexToByteIndex(start, currentUnitOffset, offset, decodeOptions); |
| } |
| |
| void ConvertToUnicode(__out_ecount_full(cch) LPOLESTR pch, charcount_t cch, EncodedCharPtr start, EncodedCharPtr end) |
| { |
| m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doSecondSurrogatePair); |
| utf8::DecodeUnitsInto(pch, start, end, m_decodeOptions); |
| } |
| |
| public: |
| void Clear() |
| { |
| m_cMultiUnits = 0; |
| m_decodeOptions = utf8::doAllowThreeByteSurrogates; |
| } |
| |
| // If we get UTF8 source buffer, turn off doAllowThreeByteSurrogates but allow invalid WCHARs without replacing them with replacement 'g_chUnknown'. |
| void SetIsUtf8(bool isUtf8) |
| { |
| if (isUtf8) |
| { |
| m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowThreeByteSurrogates | utf8::doAllowInvalidWCHARs); |
| } |
| else |
| { |
| m_decodeOptions = (utf8::DecodeOptions)(m_decodeOptions & ~utf8::doAllowInvalidWCHARs | utf8::doAllowThreeByteSurrogates); |
| } |
| } |
| bool IsUtf8() const { return (m_decodeOptions & utf8::doAllowThreeByteSurrogates) == 0; } |
| }; |
| |
| typedef UTF8EncodingPolicyBase<false> NotNullTerminatedUTF8EncodingPolicy; |
| |
| interface IScanner |
| { |
| virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine) = 0; |
| virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine) = 0; |
| }; |
| |
| // Flags that can be provided to the Scan functions. |
| // These can be bitwise OR'ed. |
| enum ScanFlag |
| { |
| ScanFlagNone = 0, |
| ScanFlagSuppressStrPid = 1, // Force strings to always have pid |
| }; |
| |
| typedef HRESULT (*CommentCallback)(void *data, OLECHAR firstChar, OLECHAR secondChar, bool containTypeDef, charcount_t min, charcount_t lim, bool adjacent, bool multiline, charcount_t startLine, charcount_t endLine); |
| |
| // Restore point defined using a relative offset rather than a pointer. |
| struct RestorePoint |
| { |
| Field(charcount_t) m_ichMinTok; |
| Field(charcount_t) m_ichMinLine; |
| Field(size_t) m_cMinTokMultiUnits; |
| Field(size_t) m_cMinLineMultiUnits; |
| Field(charcount_t) m_line; |
| Field(uint) functionIdIncrement; |
| Field(size_t) lengthDecr; |
| Field(BOOL) m_fHadEol; |
| |
| #ifdef DEBUG |
| Field(size_t) m_cMultiUnits; |
| #endif |
| |
| RestorePoint() |
| : m_ichMinTok((charcount_t)-1), |
| m_ichMinLine((charcount_t)-1), |
| m_cMinTokMultiUnits((size_t)-1), |
| m_cMinLineMultiUnits((size_t)-1), |
| m_line((charcount_t)-1), |
| functionIdIncrement(0), |
| lengthDecr(0), |
| m_fHadEol(FALSE) |
| #ifdef DEBUG |
| , m_cMultiUnits((size_t)-1) |
| #endif |
| { |
| }; |
| }; |
| |
| template <typename EncodingPolicy> |
| class Scanner : public IScanner, public EncodingPolicy |
| { |
| friend Parser; |
| typedef typename EncodingPolicy::EncodedChar EncodedChar; |
| typedef typename EncodingPolicy::EncodedCharPtr EncodedCharPtr; |
| |
| public: |
| Scanner(Parser* parser, Token *ptoken, Js::ScriptContext *scriptContext); |
| ~Scanner(void); |
| |
| tokens Scan(); |
| tokens ScanNoKeywords(); |
| tokens ScanForcingPid(); |
| void SetText(EncodedCharPtr psz, size_t offset, size_t length, charcount_t characterOffset, bool isUtf8, ULONG grfscr, ULONG lineNumber = 0); |
| #if ENABLE_BACKGROUND_PARSING |
| void PrepareForBackgroundParse(Js::ScriptContext *scriptContext); |
| #endif |
| enum ScanState |
| { |
| ScanStateNormal = 0, |
| ScanStateStringTemplateMiddleOrEnd = 1, |
| }; |
| |
| ScanState GetScanState() { return m_scanState; } |
| void SetScanState(ScanState state) { m_scanState = state; } |
| |
| bool SetYieldIsKeywordRegion(bool fYieldIsKeywordRegion) |
| { |
| bool fPrevYieldIsKeywordRegion = m_fYieldIsKeywordRegion; |
| m_fYieldIsKeywordRegion = fYieldIsKeywordRegion; |
| return fPrevYieldIsKeywordRegion; |
| } |
| bool YieldIsKeywordRegion() |
| { |
| return m_fYieldIsKeywordRegion; |
| } |
| bool YieldIsKeyword() |
| { |
| return YieldIsKeywordRegion() || this->IsStrictMode(); |
| } |
| |
| bool SetAwaitIsKeywordRegion(bool fAwaitIsKeywordRegion) |
| { |
| bool fPrevAwaitIsKeywordRegion = m_fAwaitIsKeywordRegion; |
| m_fAwaitIsKeywordRegion = fAwaitIsKeywordRegion; |
| return fPrevAwaitIsKeywordRegion; |
| } |
| bool AwaitIsKeywordRegion() |
| { |
| return m_fAwaitIsKeywordRegion; |
| } |
| |
| bool AwaitIsKeyword() |
| { |
| return AwaitIsKeywordRegion() || this->m_fIsModuleCode; |
| } |
| |
| tokens TryRescanRegExp(); |
| tokens RescanRegExp(); |
| tokens RescanRegExpNoAST(); |
| tokens RescanRegExpTokenizer(); |
| |
| BOOL FHadNewLine(void) |
| { |
| return m_fHadEol; |
| } |
| IdentPtr PidFromLong(int32 lw); |
| IdentPtr PidFromDbl(double dbl); |
| |
| LPCOLESTR StringFromLong(int32 lw); |
| LPCOLESTR StringFromDbl(double dbl); |
| |
| IdentPtr GetSecondaryBufferAsPid(); |
| |
| BYTE SetDeferredParse(BOOL defer) |
| { |
| BYTE fOld = m_DeferredParseFlags; |
| if (defer) |
| { |
| m_DeferredParseFlags |= ScanFlagSuppressStrPid; |
| } |
| else |
| { |
| m_DeferredParseFlags = ScanFlagNone; |
| } |
| return fOld; |
| } |
| |
| void SetDeferredParseFlags(BYTE flags) |
| { |
| m_DeferredParseFlags = flags; |
| } |
| |
| // the functions IsDoubleQuoteOnLastTkStrCon() and IsHexOrOctOnLastTKNumber() works only with a scanner without lookahead |
| // Both functions are used to get more info on the last token for specific diffs necessary for JSON parsing. |
| |
| |
| //Single quotes are not legal in JSON strings. Make distinction between single quote string constant and single quote string |
| BOOL IsDoubleQuoteOnLastTkStrCon() |
| { |
| return m_doubleQuoteOnLastTkStrCon; |
| } |
| |
| // True if all chars of last string constant are ascii |
| BOOL IsEscapeOnLastTkStrCon() |
| { |
| return m_EscapeOnLastTkStrCon; |
| } |
| |
| |
| bool IsOctOrLeadingZeroOnLastTKNumber() |
| { |
| return m_OctOrLeadingZeroOnLastTKNumber; |
| } |
| |
| // Returns the character offset of the first token. The character offset is the offset the first character of the token would |
| // have if the entire file was converted to Unicode (UTF16-LE). |
| charcount_t IchMinTok(void) const |
| { |
| Assert(m_pchMinTok - m_pchBase >= 0); |
| Assert(m_pchMinTok - m_pchBase <= LONG_MAX); |
| Assert(static_cast<charcount_t>(m_pchMinTok - m_pchBase) >= m_cMinTokMultiUnits); |
| return static_cast<charcount_t>(m_pchMinTok - m_pchBase - m_cMinTokMultiUnits); |
| } |
| |
| // Returns the character offset of the character immediately following the token. The character offset is the offset the first |
| // character of the token would have if the entire file was converted to Unicode (UTF16-LE). |
| charcount_t IchLimTok(void) const |
| { |
| |
| Assert(m_currentCharacter - m_pchBase >= 0); |
| Assert(m_currentCharacter - m_pchBase <= LONG_MAX); |
| Assert(static_cast<charcount_t>(m_currentCharacter - m_pchBase) >= this->m_cMultiUnits); |
| return static_cast<charcount_t>(m_currentCharacter - m_pchBase - this->m_cMultiUnits); |
| } |
| |
| void SetErrorPosition(charcount_t ichMinError, charcount_t ichLimError) |
| { |
| Assert(ichLimError > 0 || ichMinError == 0); |
| m_ichMinError = ichMinError; |
| m_ichLimError = ichLimError; |
| } |
| |
| charcount_t IchMinError(void) const |
| { |
| return m_ichLimError ? m_ichMinError : IchMinTok(); |
| } |
| |
| charcount_t IchLimError(void) const |
| { |
| return m_ichLimError ? m_ichLimError : IchLimTok(); |
| } |
| |
| // Returns the encoded unit offset of first character of the token. For example, in a UTF-8 encoding this is the offset into |
| // the UTF-8 buffer. In Unicode this is the same as IchMinTok(). |
| size_t IecpMinTok(void) const |
| { |
| return static_cast< size_t >(m_pchMinTok - m_pchBase); |
| } |
| |
| // Returns the encoded unit offset of the character immediately following the token. For example, in a UTF-8 encoding this is |
| // the offset into the UTF-8 buffer. In Unicode this is the same as IchLimTok(). |
| size_t IecpLimTok(void) const |
| { |
| return static_cast< size_t >(m_currentCharacter - m_pchBase); |
| } |
| |
| size_t IecpLimTokPrevious() const |
| { |
| AssertMsg(m_iecpLimTokPrevious != (size_t)-1, "IecpLimTokPrevious() cannot be called before scanning a token"); |
| return m_iecpLimTokPrevious; |
| } |
| |
| charcount_t IchLimTokPrevious() const |
| { |
| AssertMsg(m_ichLimTokPrevious != (charcount_t)-1, "IchLimTokPrevious() cannot be called before scanning a token"); |
| return m_ichLimTokPrevious; |
| } |
| |
| IdentPtr PidAt(size_t iecpMin, size_t iecpLim); |
| |
| // Returns the character offset within the stream of the first character on the current line. |
| charcount_t IchMinLine(void) const |
| { |
| Assert(m_pchMinLine - m_pchBase >= 0); |
| Assert(m_pchMinLine - m_pchBase <= LONG_MAX); |
| Assert(static_cast<charcount_t>(m_pchMinLine - m_pchBase) >= m_cMinLineMultiUnits); |
| return static_cast<charcount_t>(m_pchMinLine - m_pchBase - m_cMinLineMultiUnits); |
| } |
| |
| // Returns the current line number |
| charcount_t LineCur(void) const { return m_line; } |
| |
| void SetCurrentCharacter(charcount_t offset, ULONG lineNumber = 0) |
| { |
| DebugOnly(m_iecpLimTokPrevious = (size_t)-1); |
| DebugOnly(m_ichLimTokPrevious = (charcount_t)-1); |
| size_t length = m_pchLast - m_pchBase; |
| if (offset > length) offset = static_cast< charcount_t >(length); |
| size_t ibOffset = this->CharacterOffsetToUnitOffset(m_pchBase, m_currentCharacter, m_pchLast, offset); |
| m_currentCharacter = m_pchBase + ibOffset; |
| Assert(ibOffset >= offset); |
| this->RestoreMultiUnits(ibOffset - offset); |
| m_line = lineNumber; |
| } |
| |
| // IScanner methods |
| virtual void GetErrorLineInfo(__out int32& ichMin, __out int32& ichLim, __out int32& line, __out int32& ichMinLine) |
| { |
| ichMin = this->IchMinError(); |
| ichLim = this->IchLimError(); |
| line = this->LineCur(); |
| ichMinLine = this->IchMinLine(); |
| if (m_ichLimError && m_ichMinError < (charcount_t)ichMinLine) |
| { |
| line = m_startLine; |
| ichMinLine = UpdateLine(line, m_pchStartLine, m_pchLast, 0, ichMin); |
| } |
| } |
| |
| virtual HRESULT SysAllocErrorLine(int32 ichMinLine, __out BSTR* pbstrLine); |
| |
| class TemporaryBuffer |
| { |
| friend Scanner<EncodingPolicy>; |
| |
| private: |
| // Keep a reference to the scanner. |
| // We will use it to signal an error if we fail to allocate the buffer. |
| Scanner<EncodingPolicy>* m_pscanner; |
| uint32 m_cchMax; |
| uint32 m_ichCur; |
| __field_ecount(m_cchMax) OLECHAR *m_prgch; |
| byte m_rgbInit[256]; |
| |
| public: |
| TemporaryBuffer() |
| { |
| m_pscanner = nullptr; |
| m_prgch = (OLECHAR*)m_rgbInit; |
| m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR); |
| m_ichCur = 0; |
| } |
| |
| ~TemporaryBuffer() |
| { |
| if (m_prgch != (OLECHAR*)m_rgbInit) |
| { |
| free(m_prgch); |
| } |
| } |
| |
| void Reset() |
| { |
| m_ichCur = 0; |
| } |
| |
| void Clear() |
| { |
| if (m_prgch != (OLECHAR*)m_rgbInit) |
| { |
| free(m_prgch); |
| m_prgch = (OLECHAR*)m_rgbInit; |
| m_cchMax = _countof(m_rgbInit) / sizeof(OLECHAR); |
| } |
| Reset(); |
| } |
| |
| void AppendCh(uint ch) |
| { |
| return AppendCh<true>(ch); |
| } |
| |
| template<bool performAppend> void AppendCh(uint ch) |
| { |
| if (performAppend) |
| { |
| if (m_ichCur >= m_cchMax) |
| { |
| Grow(); |
| } |
| |
| Assert(m_ichCur < m_cchMax); |
| __analysis_assume(m_ichCur < m_cchMax); |
| |
| m_prgch[m_ichCur++] = static_cast<OLECHAR>(ch); |
| } |
| } |
| |
| private: |
| void Grow() |
| { |
| Assert(m_pscanner != nullptr); |
| byte *prgbNew; |
| byte *prgbOld = (byte *)m_prgch; |
| |
| ULONG cbNew; |
| if (FAILED(ULongMult(m_cchMax, sizeof(OLECHAR) * 2, &cbNew))) |
| { |
| m_pscanner->Error(ERRnoMemory); |
| } |
| |
| if (prgbOld == m_rgbInit) |
| { |
| if (nullptr == (prgbNew = static_cast<byte*>(malloc(cbNew)))) |
| m_pscanner->Error(ERRnoMemory); |
| js_memcpy_s(prgbNew, cbNew, prgbOld, m_ichCur * sizeof(OLECHAR)); |
| } |
| else if (nullptr == (prgbNew = static_cast<byte*>(realloc(prgbOld, cbNew)))) |
| { |
| m_pscanner->Error(ERRnoMemory); |
| } |
| |
| m_prgch = (OLECHAR*)prgbNew; |
| m_cchMax = cbNew / sizeof(OLECHAR); |
| } |
| }; |
| |
| void Capture(_Out_ RestorePoint* restorePoint); |
| void SeekTo(const RestorePoint& restorePoint); |
| void SeekToForcingPid(const RestorePoint& restorePoint); |
| |
| void Capture(_Out_ RestorePoint* restorePoint, uint functionIdIncrement, size_t lengthDecr); |
| void SeekTo(const RestorePoint& restorePoint, uint *nextFunctionId); |
| |
| void Clear(); |
| |
| HashTbl * GetHashTbl() { return &m_htbl; } |
| private: |
| Parser *m_parser; |
| HashTbl m_htbl; |
| Token *m_ptoken; |
| EncodedCharPtr m_pchBase; // beginning of source |
| EncodedCharPtr m_pchLast; // The end of source |
| EncodedCharPtr m_pchMinLine; // beginning of current line |
| EncodedCharPtr m_pchMinTok; // beginning of current token |
| EncodedCharPtr m_currentCharacter; // current character |
| EncodedCharPtr m_pchPrevLine; // beginning of previous line |
| size_t m_cMinTokMultiUnits; // number of multi-unit characters previous to m_pchMinTok |
| size_t m_cMinLineMultiUnits; // number of multi-unit characters previous to m_pchMinLine |
| uint16 m_fStringTemplateDepth; // we should treat } as string template middle starting character (depth instead of flag) |
| BOOL m_fHadEol; |
| BOOL m_fIsModuleCode : 1; |
| BOOL m_doubleQuoteOnLastTkStrCon :1; |
| bool m_OctOrLeadingZeroOnLastTKNumber :1; |
| bool m_EscapeOnLastTkStrCon:1; |
| BOOL m_fNextStringTemplateIsTagged:1; // the next string template scanned has a tag (must create raw strings) |
| BYTE m_DeferredParseFlags:2; // suppressStrPid and suppressIdPid |
| bool es6UnicodeMode; // True if ES6Unicode Extensions are enabled. |
| bool m_fYieldIsKeywordRegion; // Whether to treat 'yield' as an identifier or keyword |
| bool m_fAwaitIsKeywordRegion; // Whether to treat 'await' as an identifier or keyword |
| |
| // Temporary buffer. |
| TemporaryBuffer m_tempChBuf; |
| TemporaryBuffer m_tempChBufSecondary; |
| |
| charcount_t m_line; |
| ScanState m_scanState; |
| |
| charcount_t m_ichMinError; |
| charcount_t m_ichLimError; |
| charcount_t m_startLine; |
| EncodedCharPtr m_pchStartLine; |
| |
| Js::ScriptContext* m_scriptContext; |
| const Js::CharClassifier *charClassifier; |
| |
| tokens m_tkPrevious; |
| size_t m_iecpLimTokPrevious; |
| charcount_t m_ichLimTokPrevious; |
| |
| void ClearStates(); |
| |
| template <bool forcePid> |
| void SeekAndScan(const RestorePoint& restorePoint); |
| |
| tokens ScanCore(bool identifyKwds); |
| tokens ScanAhead(); |
| |
| tokens ScanError(EncodedCharPtr pchCur, tokens errorToken) |
| { |
| m_currentCharacter = pchCur; |
| return m_ptoken->tk = tkScanError; |
| } |
| |
| __declspec(noreturn) void Error(HRESULT hr) |
| { |
| m_pchMinTok = m_currentCharacter; |
| m_cMinTokMultiUnits = this->m_cMultiUnits; |
| throw ParseExceptionObject(hr); |
| } |
| |
| const EncodedCharPtr PchBase(void) const |
| { |
| return m_pchBase; |
| } |
| const EncodedCharPtr PchMinTok(void) |
| { |
| return m_pchMinTok; |
| } |
| |
| template<bool stringTemplateMode, bool createRawString> tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp); |
| tokens ScanStringConstant(OLECHAR delim, EncodedCharPtr *pp); |
| |
| tokens ScanStringTemplateBegin(EncodedCharPtr *pp); |
| tokens ScanStringTemplateMiddleOrEnd(EncodedCharPtr *pp); |
| |
| void ScanNewLine(uint ch); |
| void NotifyScannedNewLine(); |
| charcount_t LineLength(EncodedCharPtr first, EncodedCharPtr last, size_t* cb); |
| |
| tokens ScanIdentifier(bool identifyKwds, EncodedCharPtr *pp); |
| BOOL FastIdentifierContinue(EncodedCharPtr&p, EncodedCharPtr last); |
| tokens ScanIdentifierContinue(bool identifyKwds, bool fHasEscape, bool fHasMultiChar, EncodedCharPtr pchMin, EncodedCharPtr p, EncodedCharPtr *pp); |
| tokens SkipComment(EncodedCharPtr *pp, /* out */ bool* containTypeDef); |
| tokens ScanRegExpConstant(ArenaAllocator* alloc); |
| tokens ScanRegExpConstantNoAST(ArenaAllocator* alloc); |
| EncodedCharPtr FScanNumber(EncodedCharPtr p, double *pdbl, bool& likelyInt); |
| IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last, bool fHadEscape, bool fHasMultiChar); |
| IdentPtr PidOfIdentiferAt(EncodedCharPtr p, EncodedCharPtr last); |
| uint32 UnescapeToTempBuf(EncodedCharPtr p, EncodedCharPtr last); |
| |
| void SaveSrcPos(void) |
| { |
| m_pchMinTok = m_currentCharacter; |
| } |
| OLECHAR PeekNextChar(void) |
| { |
| return this->PeekFull(m_currentCharacter, m_pchLast); |
| } |
| OLECHAR ReadNextChar(void) |
| { |
| return this->template ReadFull<true>(m_currentCharacter, m_pchLast); |
| } |
| |
| EncodedCharPtr AdjustedLast() const |
| { |
| return m_pchLast; |
| } |
| |
| size_t AdjustedLength() const |
| { |
| return AdjustedLast() - m_pchBase; |
| } |
| |
| bool IsStrictMode() const |
| { |
| return this->m_parser != NULL && this->m_parser->IsStrictMode(); |
| } |
| |
| // This function expects the first character to be a 'u' |
| // It will attempt to return a codepoint represented by a single escape point (either of the form \uXXXX or \u{any number of hex characters, s.t. value < 0x110000} |
| bool TryReadEscape(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar = nullptr); |
| |
| template <bool bScan> |
| bool TryReadCodePointRest(codepoint_t lower, EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *outContainsMultiUnitChar); |
| |
| template <bool bScan> |
| inline bool TryReadCodePoint(EncodedCharPtr &startingLocation, EncodedCharPtr endOfSource, codepoint_t *outChar, bool *hasEscape, bool *outContainsMultiUnitChar); |
| |
| inline BOOL IsIdContinueNext(EncodedCharPtr startingLocation, EncodedCharPtr endOfSource) |
| { |
| codepoint_t nextCodepoint; |
| bool ignore; |
| |
| if (TryReadCodePoint<false>(startingLocation, endOfSource, &nextCodepoint, &ignore, &ignore)) |
| { |
| return charClassifier->IsIdContinue(nextCodepoint); |
| } |
| |
| return false; |
| } |
| |
| charcount_t UpdateLine(int32 &line, EncodedCharPtr start, EncodedCharPtr last, charcount_t ichStart, charcount_t ichEnd); |
| }; |