| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| #include "RuntimeLibraryPch.h" |
| #include "JSONScanner.h" |
| |
| using namespace Js; |
| |
| namespace JSON |
| { |
| // -------- Scanner implementation ------------// |
| JSONScanner::JSONScanner() |
| : inputText(0), inputLen(0), pToken(0), stringBuffer(0), allocator(0), allocatorObject(0), |
| currentRangeCharacterPairList(0), stringBufferLength(0), currentIndex(0) |
| { |
| } |
| |
| void JSONScanner::Finalizer() |
| { |
| // All dynamic memory allocated by this object is on the arena - either the one this object owns or by the |
| // one shared with JSON parser - here we will deallocate ours. The others will be deallocated when JSONParser |
| // goes away which should happen right after this. |
| if (this->allocatorObject != nullptr) |
| { |
| // We created our own allocator, so we have to free it |
| this->scriptContext->ReleaseTemporaryGuestAllocator(allocatorObject); |
| } |
| } |
| |
| void JSONScanner::Init(const char16* input, uint len, Token* pOutToken, Js::ScriptContext* sc, const char16* current, ArenaAllocator* allocator) |
| { |
| // Note that allocator could be nullptr from JSONParser, if we could not reuse an allocator, keep our own |
| inputText = input; |
| currentChar = current; |
| inputLen = len; |
| pToken = pOutToken; |
| scriptContext = sc; |
| this->allocator = allocator; |
| } |
| |
| tokens JSONScanner::Scan() |
| { |
| pTokenString = currentChar; |
| |
| while (currentChar < inputText + inputLen) |
| { |
| switch(ReadNextChar()) |
| { |
| case 0: |
| //EOF |
| currentChar--; |
| return (pToken->tk = tkEOF); |
| |
| case '\t': |
| case '\r': |
| case '\n': |
| case ' ': |
| //WS - keep looping |
| break; |
| |
| case '"': |
| //check for string |
| return ScanString(); |
| |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| //decimal digit starts a number |
| { |
| currentChar--; |
| |
| // we use StrToDbl() here for compat with the rest of the engine. StrToDbl() accept a larger syntax. |
| // Verify first the JSON grammar. |
| const char16* saveCurrentChar = currentChar; |
| if(!IsJSONNumber()) |
| { |
| ThrowSyntaxError(JSERR_JsonBadNumber); |
| } |
| currentChar = saveCurrentChar; |
| double val; |
| const char16* end; |
| val = Js::NumberUtilities::StrToDbl(currentChar, &end, scriptContext); |
| if(currentChar == end) |
| { |
| ThrowSyntaxError(JSERR_JsonBadNumber); |
| } |
| AssertMsg(!Js::JavascriptNumber::IsNan(val), "Bad result from string to double conversion"); |
| pToken->tk = tkFltCon; |
| pToken->SetDouble(val, false); |
| currentChar = end; |
| return tkFltCon; |
| } |
| |
| case ',': |
| return (pToken->tk = tkComma); |
| |
| case ':': |
| return (pToken->tk = tkColon); |
| |
| case '[': |
| return (pToken->tk = tkLBrack); |
| |
| case ']': |
| return (pToken->tk = tkRBrack); |
| |
| case '-': |
| return (pToken->tk = tkSub); |
| |
| case 'n': |
| //check for 'null' |
| if (currentChar + 2 < inputText + inputLen && currentChar[0] == 'u' && currentChar[1] == 'l' && currentChar[2] == 'l') |
| { |
| currentChar += 3; |
| return (pToken->tk = tkNULL); |
| } |
| ThrowSyntaxError(JSERR_JsonIllegalChar); |
| |
| case 't': |
| //check for 'true' |
| if (currentChar + 2 < inputText + inputLen && currentChar[0] == 'r' && currentChar[1] == 'u' && currentChar[2] == 'e') |
| { |
| currentChar += 3; |
| return (pToken->tk = tkTRUE); |
| } |
| ThrowSyntaxError(JSERR_JsonIllegalChar); |
| |
| case 'f': |
| //check for 'false' |
| if (currentChar + 3 < inputText + inputLen && currentChar[0] == 'a' && currentChar[1] == 'l' && currentChar[2] == 's' && currentChar[3] == 'e') |
| { |
| currentChar += 4; |
| return (pToken->tk = tkFALSE); |
| } |
| ThrowSyntaxError(JSERR_JsonIllegalChar); |
| |
| case '{': |
| return (pToken->tk = tkLCurly); |
| |
| case '}': |
| return (pToken->tk = tkRCurly); |
| |
| default: |
| ThrowSyntaxError(JSERR_JsonIllegalChar); |
| } |
| |
| } |
| |
| return (pToken->tk = tkEOF); |
| } |
| |
| bool JSONScanner::IsJSONNumber() |
| { |
| bool firstDigitIsAZero = false; |
| if (PeekNextChar() == '0') |
| { |
| firstDigitIsAZero = true; |
| currentChar++; |
| } |
| |
| //partial verification of number JSON grammar. |
| while (currentChar < inputText + inputLen) |
| { |
| switch(ReadNextChar()) |
| { |
| case 0: |
| return false; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| if (firstDigitIsAZero) |
| { |
| return false; |
| } |
| break; |
| |
| case '.': |
| { |
| // at least one digit after '.' |
| if(currentChar < inputText + inputLen) |
| { |
| char16 nch = ReadNextChar(); |
| if('0' <= nch && nch <= '9') |
| { |
| return true; |
| } |
| else |
| { |
| return false; |
| } |
| } |
| else |
| { |
| return false; |
| } |
| } |
| //case 'E': |
| //case 'e': |
| // return true; |
| default: |
| return true; |
| } |
| |
| firstDigitIsAZero = false; |
| } |
| return true; |
| } |
| |
| tokens JSONScanner::ScanString() |
| { |
| char16 ch; |
| |
| this->currentIndex = 0; |
| this->currentString = const_cast<char16*>(currentChar); |
| bool endFound = false; |
| bool isStringDirectInputTextMapped = true; |
| LPCWSTR bulkStart = currentChar; |
| uint bulkLength = 0; |
| |
| while (currentChar < inputText + inputLen) |
| { |
| ch = ReadNextChar(); |
| int tempHex; |
| |
| if (ch == '"') |
| { |
| //end of the string |
| endFound = true; |
| break; |
| } |
| else if (ch <= 0x1F) |
| { |
| //JSON doesn't accept \u0000 - \u001f range, LS(\u2028) and PS(\u2029) are ok |
| ThrowSyntaxError(JSERR_JsonIllegalChar); |
| } |
| else if ( 0 == ch ) |
| { |
| currentChar--; |
| ThrowSyntaxError(JSERR_JsonNoStrEnd); |
| } |
| else if ('\\' == ch) |
| { |
| //JSON escape sequence in a string \", \/, \\, \b, \f, \n, \r, \t, unicode seq |
| // unlikely V5.8 regular chars are not escaped, i.e '\g'' in a string is illegal not 'g' |
| if (currentChar >= inputText + inputLen ) |
| { |
| ThrowSyntaxError(JSERR_JsonNoStrEnd); |
| } |
| |
| ch = ReadNextChar(); |
| switch (ch) |
| { |
| case 0: |
| currentChar--; |
| ThrowSyntaxError(JSERR_JsonNoStrEnd); |
| |
| case '"': |
| case '/': |
| case '\\': |
| //keep ch |
| break; |
| |
| case 'b': |
| ch = 0x08; |
| break; |
| |
| case 'f': |
| ch = 0x0C; |
| break; |
| |
| case 'n': |
| ch = 0x0A; |
| break; |
| |
| case 'r': |
| ch = 0x0D; |
| break; |
| |
| case 't': |
| ch = 0x09; |
| break; |
| |
| case 'u': |
| { |
| int chcode; |
| // 4 hex digits |
| if (currentChar + 3 >= inputText + inputLen) |
| { |
| //no room left for 4 hex chars |
| ThrowSyntaxError(JSERR_JsonNoStrEnd); |
| |
| } |
| if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex)) |
| { |
| ThrowSyntaxError(JSERR_JsonBadHexDigit); |
| } |
| chcode = tempHex * 0x1000; |
| |
| if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex)) |
| { |
| ThrowSyntaxError(JSERR_JsonBadHexDigit); |
| } |
| chcode += tempHex * 0x0100; |
| |
| if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex)) |
| { |
| ThrowSyntaxError(JSERR_JsonBadHexDigit); |
| } |
| chcode += tempHex * 0x0010; |
| |
| if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex)) |
| { |
| ThrowSyntaxError(JSERR_JsonBadHexDigit); |
| } |
| chcode += tempHex; |
| AssertMsg(chcode == (chcode & 0xFFFF), "Bad unicode code"); |
| ch = (char16)chcode; |
| } |
| break; |
| |
| default: |
| // Any other '\o' is an error in JSON |
| ThrowSyntaxError(JSERR_JsonIllegalChar); |
| } |
| |
| // flush |
| this->GetCurrentRangeCharacterPairList()->Add(RangeCharacterPair((uint)(bulkStart - inputText), bulkLength, ch)); |
| |
| uint oldIndex = currentIndex; |
| currentIndex += bulkLength; |
| currentIndex++; |
| |
| if (currentIndex < oldIndex) |
| { |
| // Overflow |
| Js::Throw::OutOfMemory(); |
| } |
| |
| // mark the mode as 'string transformed' (no direct mapping in inputText possible) |
| isStringDirectInputTextMapped = false; |
| |
| // reset (to next char) |
| bulkStart = currentChar; |
| bulkLength = 0; |
| } |
| else |
| { |
| // continue |
| bulkLength++; |
| } |
| } |
| |
| if (!endFound) |
| { |
| // no ending '"' found |
| ThrowSyntaxError(JSERR_JsonNoStrEnd); |
| } |
| |
| if (isStringDirectInputTextMapped == false) |
| { |
| // If the last bulk is not ended with an escape character, make sure that is |
| // not built into the final unescaped string |
| bool shouldSkipLastCharacter = false; |
| if (bulkLength > 0) |
| { |
| shouldSkipLastCharacter = true; |
| this->GetCurrentRangeCharacterPairList()->Add(RangeCharacterPair((uint)(bulkStart - inputText), bulkLength, _u('\0'))); |
| uint oldIndex = currentIndex; |
| currentIndex += bulkLength; |
| if (currentIndex < oldIndex) |
| { |
| // Overflow |
| Js::Throw::OutOfMemory(); |
| } |
| } |
| |
| this->BuildUnescapedString(shouldSkipLastCharacter); |
| this->GetCurrentRangeCharacterPairList()->Clear(); |
| this->currentString = this->stringBuffer; |
| } |
| else |
| { |
| // make currentIndex the length (w/o the \0) |
| currentIndex = bulkLength; |
| |
| OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("ScanString(): direct-mapped string as '%.*s'\n"), |
| GetCurrentStringLen(), GetCurrentString()); |
| } |
| |
| return (pToken->tk = tkStrCon); |
| } |
| |
| void JSONScanner::BuildUnescapedString(bool shouldSkipLastCharacter) |
| { |
| AssertMsg(this->allocator != nullptr, "We must have built the allocator"); |
| AssertMsg(this->currentRangeCharacterPairList != nullptr, "We must have built the currentRangeCharacterPairList"); |
| AssertMsg(this->currentRangeCharacterPairList->Count() > 0, "We need to build the current string only because we have escaped characters"); |
| |
| // Step 1: Ensure the buffer has sufficient space |
| int requiredSize = this->GetCurrentStringLen(); |
| if (requiredSize > this->stringBufferLength) |
| { |
| if (this->stringBuffer) |
| { |
| AdeleteArray(this->allocator, this->stringBufferLength, this->stringBuffer); |
| this->stringBuffer = nullptr; |
| } |
| |
| this->stringBuffer = AnewArray(this->allocator, char16, requiredSize); |
| this->stringBufferLength = requiredSize; |
| } |
| |
| // Step 2: Copy the data to the buffer |
| int totalCopied = 0; |
| char16* begin_copy = this->stringBuffer; |
| int lastCharacterIndex = this->currentRangeCharacterPairList->Count() - 1; |
| for (int i = 0; i <= lastCharacterIndex; i++) |
| { |
| RangeCharacterPair data = this->currentRangeCharacterPairList->Item(i); |
| int charactersToCopy = data.m_rangeLength; |
| js_wmemcpy_s(begin_copy, charactersToCopy, this->inputText + data.m_rangeStart, charactersToCopy); |
| begin_copy += charactersToCopy; |
| totalCopied += charactersToCopy; |
| |
| if (i == lastCharacterIndex && shouldSkipLastCharacter) |
| { |
| continue; |
| } |
| |
| *begin_copy = data.m_char; |
| begin_copy++; |
| totalCopied++; |
| } |
| |
| if (totalCopied != requiredSize) |
| { |
| OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("BuildUnescapedString(): allocated size = %d != copying size %d\n"), requiredSize, totalCopied); |
| AssertMsg(totalCopied == requiredSize, "BuildUnescapedString(): The allocated size and copying size should match."); |
| } |
| |
| OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("BuildUnescapedString(): unescaped string as '%.*s'\n"), GetCurrentStringLen(), this->stringBuffer); |
| } |
| |
| JSONScanner::RangeCharacterPairList* JSONScanner::GetCurrentRangeCharacterPairList(void) |
| { |
| if (this->currentRangeCharacterPairList == nullptr) |
| { |
| if (this->allocator == nullptr) |
| { |
| this->allocatorObject = this->scriptContext->GetTemporaryGuestAllocator(_u("JSONScanner")); |
| this->allocator = this->allocatorObject->GetAllocator(); |
| } |
| |
| this->currentRangeCharacterPairList = Anew(this->allocator, RangeCharacterPairList, this->allocator, 4); |
| } |
| |
| return this->currentRangeCharacterPairList; |
| } |
| } // namespace JSON |