blob: 77bbfa750b84b0009aee55f62940a6f3b0b8aea9 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/json/json_parser.h"
#include "base/numerics/safe_conversions.h"
#include "third_party/blink/renderer/platform/json/json_values.h"
#include "third_party/blink/renderer/platform/wtf/decimal.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
#include "third_party/blink/renderer/platform/wtf/text/string_to_number.h"
namespace blink {
namespace {
const int kMaxStackLimit = 1000;
using Error = JSONParseErrorType;
String FormatErrorMessage(Error error, int line, int column) {
String text;
switch (error) {
case Error::kNoError:
NOTREACHED();
return "";
case Error::kUnexpectedToken:
text = "Unexpected token.";
break;
case Error::kSyntaxError:
text = "Syntax error.";
break;
case Error::kInvalidEscape:
text = "Invalid escape sequence.";
break;
case Error::kTooMuchNesting:
text = "Too much nesting.";
break;
case Error::kUnexpectedDataAfterRoot:
text = "Unexpected data after root element.";
break;
case Error::kUnsupportedEncoding:
text =
"Unsupported encoding. JSON and all string literals must contain "
"valid Unicode characters.";
break;
}
return "Line: " + String::Number(line) +
", column: " + String::Number(column) + ", " + text;
}
// Note: all parsing functions take a |cursor| parameter which is
// where they start parsing from.
// If the parsing succeeds, |cursor| will point to the position
// right after the parsed value, "consuming" some portion of the input.
// If the parsing fails, |cursor| will point to the error position.
template <typename CharType>
struct Cursor {
int line;
const CharType* line_start;
const CharType* pos;
};
enum Token {
kObjectBegin,
kObjectEnd,
kArrayBegin,
kArrayEnd,
kStringLiteral,
kNumber,
kBoolTrue,
kBoolFalse,
kNullToken,
kListSeparator,
kObjectPairSeparator,
};
template <typename CharType>
Error ParseConstToken(Cursor<CharType>* cursor,
const CharType* end,
const char* token) {
const CharType* token_start = cursor->pos;
while (cursor->pos < end && *token != '\0' && *(cursor->pos++) == *token++) {
}
if (*token != '\0') {
cursor->pos = token_start;
return Error::kSyntaxError;
}
return Error::kNoError;
}
template <typename CharType>
Error ReadInt(Cursor<CharType>* cursor,
const CharType* end,
bool can_have_leading_zeros) {
if (cursor->pos == end)
return Error::kSyntaxError;
const CharType* start_ptr = cursor->pos;
bool have_leading_zero = '0' == *(cursor->pos);
int length = 0;
while (cursor->pos < end && '0' <= *(cursor->pos) && *(cursor->pos) <= '9') {
++(cursor->pos);
++length;
}
if (!length)
return Error::kSyntaxError;
if (!can_have_leading_zeros && length > 1 && have_leading_zero) {
cursor->pos = start_ptr + 1;
return Error::kSyntaxError;
}
return Error::kNoError;
}
template <typename CharType>
Error ParseNumberToken(Cursor<CharType>* cursor, const CharType* end) {
// We just grab the number here. We validate the size in DecodeNumber.
// According to RFC4627, a valid number is: [minus] int [frac] [exp]
if (cursor->pos == end)
return Error::kSyntaxError;
if (*(cursor->pos) == '-')
++(cursor->pos);
Error error = ReadInt(cursor, end, false);
if (error != Error::kNoError)
return error;
if (cursor->pos == end)
return Error::kNoError;
// Optional fraction part
CharType c = *(cursor->pos);
if ('.' == c) {
++(cursor->pos);
error = ReadInt(cursor, end, true);
if (error != Error::kNoError)
return error;
if (cursor->pos == end)
return Error::kNoError;
c = *(cursor->pos);
}
// Optional exponent part
if ('e' == c || 'E' == c) {
++(cursor->pos);
if (cursor->pos == end)
return Error::kSyntaxError;
c = *(cursor->pos);
if ('-' == c || '+' == c) {
++(cursor->pos);
if (cursor->pos == end)
return Error::kSyntaxError;
}
error = ReadInt(cursor, end, true);
if (error != Error::kNoError)
return error;
}
return Error::kNoError;
}
template <typename CharType>
Error ReadHexDigits(Cursor<CharType>* cursor, const CharType* end, int digits) {
const CharType* token_start = cursor->pos;
if (end - cursor->pos < digits)
return Error::kInvalidEscape;
for (int i = 0; i < digits; ++i) {
CharType c = *(cursor->pos)++;
if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F'))) {
cursor->pos = token_start;
return Error::kInvalidEscape;
}
}
return Error::kNoError;
}
template <typename CharType>
Error ParseStringToken(Cursor<CharType>* cursor, const CharType* end) {
if (cursor->pos == end)
return Error::kSyntaxError;
if (*(cursor->pos) != '"')
return Error::kSyntaxError;
++(cursor->pos);
while (cursor->pos < end) {
CharType c = *(cursor->pos)++;
if ('\\' == c) {
if (cursor->pos == end)
return Error::kInvalidEscape;
c = *(cursor->pos)++;
// Make sure the escaped char is valid.
switch (c) {
case 'x': {
Error error = ReadHexDigits(cursor, end, 2);
if (error != Error::kNoError)
return error;
break;
}
case 'u': {
Error error = ReadHexDigits(cursor, end, 4);
if (error != Error::kNoError)
return error;
break;
}
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case '"':
break;
default:
return Error::kInvalidEscape;
}
} else if (c < 0x20) {
return Error::kSyntaxError;
} else if ('"' == c) {
return Error::kNoError;
}
}
return Error::kSyntaxError;
}
template <typename CharType>
Error SkipComment(Cursor<CharType>* cursor, const CharType* end) {
const CharType* pos = cursor->pos;
if (pos == end)
return Error::kSyntaxError;
if (*pos != '/' || pos + 1 >= end)
return Error::kSyntaxError;
++pos;
if (*pos == '/') {
// Single line comment, read to newline.
for (++pos; pos < end; ++pos) {
if (*pos == '\n') {
cursor->line++;
cursor->pos = pos + 1;
cursor->line_start = cursor->pos;
return Error::kNoError;
}
}
cursor->pos = end;
// Comment reaches end-of-input, which is fine.
return Error::kNoError;
}
if (*pos == '*') {
CharType previous = '\0';
// Block comment, read until end marker.
for (++pos; pos < end; previous = *pos++) {
if (*pos == '\n') {
cursor->line++;
cursor->line_start = pos + 1;
}
if (previous == '*' && *pos == '/') {
cursor->pos = pos + 1;
return Error::kNoError;
}
}
// Block comment must close before end-of-input.
return Error::kSyntaxError;
}
return Error::kSyntaxError;
}
template <typename CharType>
Error SkipWhitespaceAndComments(Cursor<CharType>* cursor, const CharType* end) {
while (cursor->pos < end) {
CharType c = *(cursor->pos);
if (c == '\n') {
cursor->line++;
++(cursor->pos);
cursor->line_start = cursor->pos;
} else if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
++(cursor->pos);
} else if (c == '/') {
Error error = SkipComment(cursor, end);
if (error != Error::kNoError)
return error;
} else {
break;
}
}
return Error::kNoError;
}
template <typename CharType>
Error ParseToken(Cursor<CharType>* cursor,
const CharType* end,
Token* token,
Cursor<CharType>* token_start) {
Error error = SkipWhitespaceAndComments(cursor, end);
if (error != Error::kNoError)
return error;
*token_start = *cursor;
if (cursor->pos == end)
return Error::kSyntaxError;
switch (*(cursor->pos)) {
case 'n':
*token = kNullToken;
return ParseConstToken(cursor, end, kJSONNullString);
case 't':
*token = kBoolTrue;
return ParseConstToken(cursor, end, kJSONTrueString);
case 'f':
*token = kBoolFalse;
return ParseConstToken(cursor, end, kJSONFalseString);
case '[':
++(cursor->pos);
*token = kArrayBegin;
return Error::kNoError;
case ']':
++(cursor->pos);
*token = kArrayEnd;
return Error::kNoError;
case ',':
++(cursor->pos);
*token = kListSeparator;
return Error::kNoError;
case '{':
++(cursor->pos);
*token = kObjectBegin;
return Error::kNoError;
case '}':
++(cursor->pos);
*token = kObjectEnd;
return Error::kNoError;
case ':':
++(cursor->pos);
*token = kObjectPairSeparator;
return Error::kNoError;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
*token = kNumber;
return ParseNumberToken(cursor, end);
case '"':
*token = kStringLiteral;
return ParseStringToken(cursor, end);
}
return Error::kSyntaxError;
}
template <typename CharType>
inline int HexToInt(CharType c) {
if ('0' <= c && c <= '9')
return c - '0';
if ('A' <= c && c <= 'F')
return c - 'A' + 10;
if ('a' <= c && c <= 'f')
return c - 'a' + 10;
NOTREACHED();
return 0;
}
template <typename CharType>
Error DecodeString(Cursor<CharType>* cursor,
const CharType* end,
String* output) {
if (cursor->pos + 1 > end - 1)
return Error::kSyntaxError;
if (cursor->pos + 1 == end - 1) {
*output = "";
return Error::kNoError;
}
const CharType* string_start = cursor->pos;
StringBuilder buffer;
buffer.ReserveCapacity(static_cast<wtf_size_t>(end - cursor->pos - 2));
cursor->pos++;
while (cursor->pos < end - 1) {
UChar c = *(cursor->pos)++;
if (c == '\n') {
cursor->line++;
cursor->line_start = cursor->pos;
}
if ('\\' != c) {
buffer.Append(c);
continue;
}
if (cursor->pos == end - 1)
return Error::kInvalidEscape;
c = *(cursor->pos)++;
if (c == 'x') {
// \x is not supported.
return Error::kInvalidEscape;
}
switch (c) {
case '"':
case '/':
case '\\':
break;
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case 'u':
c = (HexToInt(*(cursor->pos)) << 12) +
(HexToInt(*(cursor->pos + 1)) << 8) +
(HexToInt(*(cursor->pos + 2)) << 4) + HexToInt(*(cursor->pos + 3));
cursor->pos += 4;
break;
default:
return Error::kInvalidEscape;
}
buffer.Append(c);
}
*output = buffer.ToString();
// Validate constructed utf16 string.
if (output->Utf8(kStrictUTF8Conversion).IsNull()) {
cursor->pos = string_start;
return Error::kUnsupportedEncoding;
}
return Error::kNoError;
}
template <typename CharType>
Error BuildValue(Cursor<CharType>* cursor,
const CharType* end,
int max_depth,
std::unique_ptr<JSONValue>* result) {
if (max_depth == 0)
return Error::kTooMuchNesting;
Cursor<CharType> token_start;
Token token;
Error error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
switch (token) {
case kNullToken:
*result = JSONValue::Null();
break;
case kBoolTrue:
*result = JSONBasicValue::Create(true);
break;
case kBoolFalse:
*result = JSONBasicValue::Create(false);
break;
case kNumber: {
bool ok;
double value = CharactersToDouble(token_start.pos,
cursor->pos - token_start.pos, &ok);
if (Decimal::FromDouble(value).IsInfinity())
ok = false;
if (!ok) {
*cursor = token_start;
return Error::kSyntaxError;
}
if (base::IsValueInRangeForNumericType<int>(value) &&
static_cast<int>(value) == value)
*result = JSONBasicValue::Create(static_cast<int>(value));
else
*result = JSONBasicValue::Create(value);
break;
}
case kStringLiteral: {
String value;
error = DecodeString(&token_start, cursor->pos, &value);
if (error != Error::kNoError) {
*cursor = token_start;
return error;
}
*result = JSONString::Create(value);
break;
}
case kArrayBegin: {
std::unique_ptr<JSONArray> array = JSONArray::Create();
Cursor<CharType> before_token = *cursor;
error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
while (token != kArrayEnd) {
*cursor = before_token;
std::unique_ptr<JSONValue> array_node;
error = BuildValue(cursor, end, max_depth - 1, &array_node);
if (error != Error::kNoError)
return error;
array->PushValue(std::move(array_node));
// After a list value, we expect a comma or the end of the list.
error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
if (token == kListSeparator) {
before_token = *cursor;
error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
if (token == kArrayEnd) {
*cursor = token_start;
return Error::kUnexpectedToken;
}
} else if (token != kArrayEnd) {
// Unexpected value after list value. Bail out.
*cursor = token_start;
return Error::kUnexpectedToken;
}
}
if (token != kArrayEnd) {
*cursor = token_start;
return Error::kUnexpectedToken;
}
*result = std::move(array);
break;
}
case kObjectBegin: {
std::unique_ptr<JSONObject> object = JSONObject::Create();
error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
while (token != kObjectEnd) {
if (token != kStringLiteral) {
*cursor = token_start;
return Error::kUnexpectedToken;
}
String key;
error = DecodeString(&token_start, cursor->pos, &key);
if (error != Error::kNoError) {
*cursor = token_start;
return error;
}
error = ParseToken(cursor, end, &token, &token_start);
if (token != kObjectPairSeparator) {
*cursor = token_start;
return Error::kUnexpectedToken;
}
std::unique_ptr<JSONValue> value;
error = BuildValue(cursor, end, max_depth - 1, &value);
if (error != Error::kNoError)
return error;
object->SetValue(key, std::move(value));
// After a key/value pair, we expect a comma or the end of the
// object.
error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
if (token == kListSeparator) {
error = ParseToken(cursor, end, &token, &token_start);
if (error != Error::kNoError)
return error;
if (token == kObjectEnd) {
*cursor = token_start;
return Error::kUnexpectedToken;
}
} else if (token != kObjectEnd) {
// Unexpected value after last object value. Bail out.
*cursor = token_start;
return Error::kUnexpectedToken;
}
}
if (token != kObjectEnd) {
*cursor = token_start;
return Error::kUnexpectedToken;
}
*result = std::move(object);
break;
}
default:
// We got a token that's not a value.
*cursor = token_start;
return Error::kUnexpectedToken;
}
return SkipWhitespaceAndComments(cursor, end);
}
template <typename CharType>
JSONParseError ParseJSONInternal(const CharType* start_ptr,
unsigned length,
int max_depth,
std::unique_ptr<JSONValue>* result) {
Cursor<CharType> cursor;
cursor.pos = start_ptr;
cursor.line = 0;
cursor.line_start = start_ptr;
const CharType* end = start_ptr + length;
JSONParseError error;
error.type = BuildValue(&cursor, end, max_depth, result);
error.line = cursor.line;
error.column = static_cast<int>(cursor.pos - cursor.line_start);
if (error.type != Error::kNoError) {
*result = nullptr;
} else if (cursor.pos != end) {
error.type = Error::kUnexpectedDataAfterRoot;
*result = nullptr;
}
return error;
}
} // anonymous namespace
std::unique_ptr<JSONValue> ParseJSON(const String& json,
JSONParseError* opt_error) {
return ParseJSON(json, kMaxStackLimit, opt_error);
}
std::unique_ptr<JSONValue> ParseJSON(const String& json,
int max_depth,
JSONParseError* opt_error) {
if (max_depth < 0)
max_depth = 0;
if (max_depth > kMaxStackLimit)
max_depth = kMaxStackLimit;
std::unique_ptr<JSONValue> result;
JSONParseError error;
if (json.IsEmpty()) {
error.type = Error::kSyntaxError;
error.line = 0;
error.column = 0;
} else if (json.Is8Bit()) {
error = ParseJSONInternal(json.Characters8(), json.length(), max_depth,
&result);
} else {
error = ParseJSONInternal(json.Characters16(), json.length(), max_depth,
&result);
}
if (opt_error) {
error.line++;
error.column++;
if (error.type != Error::kNoError)
error.message = FormatErrorMessage(error.type, error.line, error.column);
*opt_error = error;
}
return result;
}
} // namespace blink