| // Copyright 2016 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "platform/json/JSONParser.h" |
| |
| #include "platform/Decimal.h" |
| #include "platform/json/JSONValues.h" |
| #include "platform/wtf/text/StringBuilder.h" |
| #include "platform/wtf/text/StringToNumber.h" |
| |
| namespace blink { |
| |
| namespace { |
| |
| const int kMaxStackLimit = 1000; |
| |
| enum Token { |
| kObjectBegin, |
| kObjectEnd, |
| kArrayBegin, |
| kArrayEnd, |
| kStringLiteral, |
| kNumber, |
| kBoolTrue, |
| kBoolFalse, |
| kNullToken, |
| kListSeparator, |
| kObjectPairSeparator, |
| kInvalidToken, |
| }; |
| |
| const char* const kNullString = "null"; |
| const char* const kTrueString = "true"; |
| const char* const kFalseString = "false"; |
| |
| template <typename CharType> |
| bool ParseConstToken(const CharType* start, |
| const CharType* end, |
| const CharType** token_end, |
| const char* token) { |
| while (start < end && *token != '\0' && *start++ == *token++) { |
| } |
| if (*token != '\0') |
| return false; |
| *token_end = start; |
| return true; |
| } |
| |
| template <typename CharType> |
| bool ReadInt(const CharType* start, |
| const CharType* end, |
| const CharType** token_end, |
| bool can_have_leading_zeros) { |
| if (start == end) |
| return false; |
| bool have_leading_zero = '0' == *start; |
| int length = 0; |
| while (start < end && '0' <= *start && *start <= '9') { |
| ++start; |
| ++length; |
| } |
| if (!length) |
| return false; |
| if (!can_have_leading_zeros && length > 1 && have_leading_zero) |
| return false; |
| *token_end = start; |
| return true; |
| } |
| |
| template <typename CharType> |
| bool ParseNumberToken(const CharType* start, |
| const CharType* end, |
| const CharType** token_end) { |
| // We just grab the number here. We validate the size in DecodeNumber. |
| // According to RFC4627, a valid number is: [minus] int [frac] [exp] |
| if (start == end) |
| return false; |
| CharType c = *start; |
| if ('-' == c) |
| ++start; |
| |
| if (!ReadInt(start, end, &start, false)) |
| return false; |
| if (start == end) { |
| *token_end = start; |
| return true; |
| } |
| |
| // Optional fraction part |
| c = *start; |
| if ('.' == c) { |
| ++start; |
| if (!ReadInt(start, end, &start, true)) |
| return false; |
| if (start == end) { |
| *token_end = start; |
| return true; |
| } |
| c = *start; |
| } |
| |
| // Optional exponent part |
| if ('e' == c || 'E' == c) { |
| ++start; |
| if (start == end) |
| return false; |
| c = *start; |
| if ('-' == c || '+' == c) { |
| ++start; |
| if (start == end) |
| return false; |
| } |
| if (!ReadInt(start, end, &start, true)) |
| return false; |
| } |
| |
| *token_end = start; |
| return true; |
| } |
| |
| template <typename CharType> |
| bool ReadHexDigits(const CharType* start, |
| const CharType* end, |
| const CharType** token_end, |
| int digits) { |
| if (end - start < digits) |
| return false; |
| for (int i = 0; i < digits; ++i) { |
| CharType c = *start++; |
| if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || |
| ('A' <= c && c <= 'F'))) |
| return false; |
| } |
| *token_end = start; |
| return true; |
| } |
| |
| template <typename CharType> |
| bool ParseStringToken(const CharType* start, |
| const CharType* end, |
| const CharType** token_end) { |
| while (start < end) { |
| CharType c = *start++; |
| if ('\\' == c) { |
| if (start == end) |
| return false; |
| c = *start++; |
| // Make sure the escaped char is valid. |
| switch (c) { |
| case 'x': |
| if (!ReadHexDigits(start, end, &start, 2)) |
| return false; |
| break; |
| case 'u': |
| if (!ReadHexDigits(start, end, &start, 4)) |
| return false; |
| break; |
| case '\\': |
| case '/': |
| case 'b': |
| case 'f': |
| case 'n': |
| case 'r': |
| case 't': |
| case 'v': |
| case '"': |
| break; |
| default: |
| return false; |
| } |
| } else if ('"' == c) { |
| *token_end = start; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| template <typename CharType> |
| bool SkipComment(const CharType* start, |
| const CharType* end, |
| const CharType** comment_end) { |
| if (start == end) |
| return false; |
| |
| if (*start != '/' || start + 1 >= end) |
| return false; |
| ++start; |
| |
| if (*start == '/') { |
| // Single line comment, read to newline. |
| for (++start; start < end; ++start) { |
| if (*start == '\n' || *start == '\r') { |
| *comment_end = start + 1; |
| return true; |
| } |
| } |
| *comment_end = end; |
| // Comment reaches end-of-input, which is fine. |
| return true; |
| } |
| |
| if (*start == '*') { |
| CharType previous = '\0'; |
| // Block comment, read until end marker. |
| for (++start; start < end; previous = *start++) { |
| if (previous == '*' && *start == '/') { |
| *comment_end = start + 1; |
| return true; |
| } |
| } |
| // Block comment must close before end-of-input. |
| return false; |
| } |
| |
| return false; |
| } |
| |
| template <typename CharType> |
| void SkipWhitespaceAndComments(const CharType* start, |
| const CharType* end, |
| const CharType** whitespace_end) { |
| while (start < end) { |
| if (IsSpaceOrNewline(*start)) { |
| ++start; |
| } else if (*start == '/') { |
| const CharType* comment_end; |
| if (!SkipComment(start, end, &comment_end)) |
| break; |
| start = comment_end; |
| } else { |
| break; |
| } |
| } |
| *whitespace_end = start; |
| } |
| |
| template <typename CharType> |
| Token ParseToken(const CharType* start, |
| const CharType* end, |
| const CharType** token_start, |
| const CharType** token_end) { |
| SkipWhitespaceAndComments(start, end, token_start); |
| start = *token_start; |
| |
| if (start == end) |
| return kInvalidToken; |
| |
| switch (*start) { |
| case 'n': |
| if (ParseConstToken(start, end, token_end, kNullString)) |
| return kNullToken; |
| break; |
| case 't': |
| if (ParseConstToken(start, end, token_end, kTrueString)) |
| return kBoolTrue; |
| break; |
| case 'f': |
| if (ParseConstToken(start, end, token_end, kFalseString)) |
| return kBoolFalse; |
| break; |
| case '[': |
| *token_end = start + 1; |
| return kArrayBegin; |
| case ']': |
| *token_end = start + 1; |
| return kArrayEnd; |
| case ',': |
| *token_end = start + 1; |
| return kListSeparator; |
| case '{': |
| *token_end = start + 1; |
| return kObjectBegin; |
| case '}': |
| *token_end = start + 1; |
| return kObjectEnd; |
| case ':': |
| *token_end = start + 1; |
| return kObjectPairSeparator; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| case '-': |
| if (ParseNumberToken(start, end, token_end)) |
| return kNumber; |
| break; |
| case '"': |
| if (ParseStringToken(start + 1, end, token_end)) |
| return kStringLiteral; |
| break; |
| } |
| return kInvalidToken; |
| } |
| |
| template <typename CharType> |
| inline int HexToInt(CharType c) { |
| if ('0' <= c && c <= '9') |
| return c - '0'; |
| if ('A' <= c && c <= 'F') |
| return c - 'A' + 10; |
| if ('a' <= c && c <= 'f') |
| return c - 'a' + 10; |
| NOTREACHED(); |
| return 0; |
| } |
| |
| template <typename CharType> |
| bool DecodeString(const CharType* start, |
| const CharType* end, |
| StringBuilder* output) { |
| while (start < end) { |
| UChar c = *start++; |
| if ('\\' != c) { |
| output->Append(c); |
| continue; |
| } |
| if (start == end) |
| return false; |
| c = *start++; |
| |
| if (c == 'x') { |
| // \x is not supported. |
| return false; |
| } |
| |
| switch (c) { |
| case '"': |
| case '/': |
| case '\\': |
| break; |
| case 'b': |
| c = '\b'; |
| break; |
| case 'f': |
| c = '\f'; |
| break; |
| case 'n': |
| c = '\n'; |
| break; |
| case 'r': |
| c = '\r'; |
| break; |
| case 't': |
| c = '\t'; |
| break; |
| case 'v': |
| c = '\v'; |
| break; |
| case 'u': |
| c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) + |
| (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3)); |
| start += 4; |
| break; |
| default: |
| return false; |
| } |
| output->Append(c); |
| } |
| return true; |
| } |
| |
| template <typename CharType> |
| bool DecodeString(const CharType* start, const CharType* end, String* output) { |
| if (start == end) { |
| *output = ""; |
| return true; |
| } |
| if (start > end) |
| return false; |
| StringBuilder buffer; |
| buffer.ReserveCapacity(end - start); |
| if (!DecodeString(start, end, &buffer)) |
| return false; |
| *output = buffer.ToString(); |
| // Validate constructed utf16 string. |
| if (output->Utf8(kStrictUTF8Conversion).IsNull()) |
| return false; |
| return true; |
| } |
| |
| template <typename CharType> |
| std::unique_ptr<JSONValue> BuildValue(const CharType* start, |
| const CharType* end, |
| const CharType** value_token_end, |
| int max_depth) { |
| if (max_depth == 0) |
| return nullptr; |
| |
| std::unique_ptr<JSONValue> result; |
| const CharType* token_start; |
| const CharType* token_end; |
| Token token = ParseToken(start, end, &token_start, &token_end); |
| switch (token) { |
| case kInvalidToken: |
| return nullptr; |
| case kNullToken: |
| result = JSONValue::Null(); |
| break; |
| case kBoolTrue: |
| result = JSONBasicValue::Create(true); |
| break; |
| case kBoolFalse: |
| result = JSONBasicValue::Create(false); |
| break; |
| case kNumber: { |
| bool ok; |
| double value = |
| CharactersToDouble(token_start, token_end - token_start, &ok); |
| if (Decimal::FromDouble(value).IsInfinity()) |
| ok = false; |
| if (!ok) |
| return nullptr; |
| int number = static_cast<int>(value); |
| if (number == value) |
| result = JSONBasicValue::Create(number); |
| else |
| result = JSONBasicValue::Create(value); |
| break; |
| } |
| case kStringLiteral: { |
| String value; |
| bool ok = DecodeString(token_start + 1, token_end - 1, &value); |
| if (!ok) |
| return nullptr; |
| result = JSONString::Create(value); |
| break; |
| } |
| case kArrayBegin: { |
| std::unique_ptr<JSONArray> array = JSONArray::Create(); |
| start = token_end; |
| token = ParseToken(start, end, &token_start, &token_end); |
| while (token != kArrayEnd) { |
| std::unique_ptr<JSONValue> array_node = |
| BuildValue(start, end, &token_end, max_depth - 1); |
| if (!array_node) |
| return nullptr; |
| array->PushValue(std::move(array_node)); |
| |
| // After a list value, we expect a comma or the end of the list. |
| start = token_end; |
| token = ParseToken(start, end, &token_start, &token_end); |
| if (token == kListSeparator) { |
| start = token_end; |
| token = ParseToken(start, end, &token_start, &token_end); |
| if (token == kArrayEnd) |
| return nullptr; |
| } else if (token != kArrayEnd) { |
| // Unexpected value after list value. Bail out. |
| return nullptr; |
| } |
| } |
| if (token != kArrayEnd) |
| return nullptr; |
| result = std::move(array); |
| break; |
| } |
| case kObjectBegin: { |
| std::unique_ptr<JSONObject> object = JSONObject::Create(); |
| start = token_end; |
| token = ParseToken(start, end, &token_start, &token_end); |
| while (token != kObjectEnd) { |
| if (token != kStringLiteral) |
| return nullptr; |
| String key; |
| if (!DecodeString(token_start + 1, token_end - 1, &key)) |
| return nullptr; |
| start = token_end; |
| |
| token = ParseToken(start, end, &token_start, &token_end); |
| if (token != kObjectPairSeparator) |
| return nullptr; |
| start = token_end; |
| |
| std::unique_ptr<JSONValue> value = |
| BuildValue(start, end, &token_end, max_depth - 1); |
| if (!value) |
| return nullptr; |
| object->SetValue(key, std::move(value)); |
| start = token_end; |
| |
| // After a key/value pair, we expect a comma or the end of the |
| // object. |
| token = ParseToken(start, end, &token_start, &token_end); |
| if (token == kListSeparator) { |
| start = token_end; |
| token = ParseToken(start, end, &token_start, &token_end); |
| if (token == kObjectEnd) |
| return nullptr; |
| } else if (token != kObjectEnd) { |
| // Unexpected value after last object value. Bail out. |
| return nullptr; |
| } |
| } |
| if (token != kObjectEnd) |
| return nullptr; |
| result = std::move(object); |
| break; |
| } |
| |
| default: |
| // We got a token that's not a value. |
| return nullptr; |
| } |
| |
| SkipWhitespaceAndComments(token_end, end, value_token_end); |
| return result; |
| } |
| |
| template <typename CharType> |
| std::unique_ptr<JSONValue> ParseJSONInternal(const CharType* start, |
| unsigned length, |
| int max_depth) { |
| const CharType* end = start + length; |
| const CharType* token_end; |
| std::unique_ptr<JSONValue> value = |
| BuildValue(start, end, &token_end, max_depth); |
| if (!value || token_end != end) |
| return nullptr; |
| return value; |
| } |
| |
| } // anonymous namespace |
| |
| std::unique_ptr<JSONValue> ParseJSON(const String& json) { |
| return ParseJSON(json, kMaxStackLimit); |
| } |
| |
| std::unique_ptr<JSONValue> ParseJSON(const String& json, int max_depth) { |
| if (json.IsEmpty()) |
| return nullptr; |
| if (max_depth < 0) |
| max_depth = 0; |
| if (max_depth > kMaxStackLimit) |
| max_depth = kMaxStackLimit; |
| if (json.Is8Bit()) |
| return ParseJSONInternal(json.Characters8(), json.length(), max_depth); |
| return ParseJSONInternal(json.Characters16(), json.length(), max_depth); |
| } |
| |
| } // namespace blink |