blob: 543f24c678ddf4fa6f55afb51c7f0136e836bfa2 [file] [log] [blame]
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_JSON_JSON_PARSER_H_
#define V8_JSON_JSON_PARSER_H_
#include "include/v8-callbacks.h"
#include "src/base/small-vector.h"
#include "src/base/strings.h"
#include "src/common/high-allocation-throughput-scope.h"
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
#include "src/objects/objects.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
enum ParseElementResult { kElementFound, kElementNotFound };
class JsonString final {
public:
JsonString()
: start_(0),
length_(0),
needs_conversion_(false),
internalize_(false),
has_escape_(false),
is_index_(false) {}
explicit JsonString(uint32_t index)
: index_(index),
length_(0),
needs_conversion_(false),
internalize_(false),
has_escape_(false),
is_index_(true) {}
JsonString(int start, int length, bool needs_conversion,
bool needs_internalization, bool has_escape)
: start_(start),
length_(length),
needs_conversion_(needs_conversion),
internalize_(needs_internalization ||
length_ <= kMaxInternalizedStringValueLength),
has_escape_(has_escape),
is_index_(false) {}
bool internalize() const {
DCHECK(!is_index_);
return internalize_;
}
bool needs_conversion() const {
DCHECK(!is_index_);
return needs_conversion_;
}
bool has_escape() const {
DCHECK(!is_index_);
return has_escape_;
}
int start() const {
DCHECK(!is_index_);
return start_;
}
int length() const {
DCHECK(!is_index_);
return length_;
}
uint32_t index() const {
DCHECK(is_index_);
return index_;
}
bool is_index() const { return is_index_; }
private:
static const int kMaxInternalizedStringValueLength = 10;
union {
const int start_;
const uint32_t index_;
};
const int length_;
const bool needs_conversion_ : 1;
const bool internalize_ : 1;
const bool has_escape_ : 1;
const bool is_index_ : 1;
};
struct JsonProperty {
JsonProperty() { UNREACHABLE(); }
explicit JsonProperty(const JsonString& string) : string(string) {}
JsonString string;
Handle<Object> value;
};
class JsonParseInternalizer {
public:
static MaybeHandle<Object> Internalize(Isolate* isolate,
Handle<Object> object,
Handle<Object> reviver);
private:
JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver)
: isolate_(isolate), reviver_(reviver) {}
MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder,
Handle<String> key);
bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name);
Isolate* isolate_;
Handle<JSReceiver> reviver_;
};
enum class JsonToken : uint8_t {
NUMBER,
STRING,
LBRACE,
RBRACE,
LBRACK,
RBRACK,
TRUE_LITERAL,
FALSE_LITERAL,
NULL_LITERAL,
WHITESPACE,
COLON,
COMMA,
ILLEGAL,
EOS
};
// A simple json parser.
template <typename Char>
class JsonParser final {
public:
using SeqString = typename CharTraits<Char>::String;
using SeqExternalString = typename CharTraits<Char>::ExternalString;
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse(
Isolate* isolate, Handle<String> source, Handle<Object> reviver) {
HighAllocationThroughputScope high_throughput_scope(
V8::GetCurrentPlatform());
Handle<Object> result;
ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
JsonParser(isolate, source).ParseJson(), Object);
if (reviver->IsCallable()) {
return JsonParseInternalizer::Internalize(isolate, result, reviver);
}
return result;
}
static constexpr base::uc32 kEndOfString = static_cast<base::uc32>(-1);
static constexpr base::uc32 kInvalidUnicodeCharacter =
static_cast<base::uc32>(-1);
private:
template <typename T>
using SmallVector = base::SmallVector<T, 16>;
struct JsonContinuation {
enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
JsonContinuation(Isolate* isolate, Type type, size_t index)
: scope(isolate),
type_(type),
index(static_cast<uint32_t>(index)),
max_index(0),
elements(0) {}
Type type() const { return static_cast<Type>(type_); }
void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
HandleScope scope;
// Unfortunately GCC doesn't like packing Type in two bits.
uint32_t type_ : 2;
uint32_t index : 30;
uint32_t max_index;
uint32_t elements;
};
JsonParser(Isolate* isolate, Handle<String> source);
~JsonParser();
// Parse a string containing a single JSON value.
MaybeHandle<Object> ParseJson();
void advance() { ++cursor_; }
base::uc32 CurrentCharacter() {
if (V8_UNLIKELY(is_at_end())) return kEndOfString;
return *cursor_;
}
base::uc32 NextCharacter() {
advance();
return CurrentCharacter();
}
void AdvanceToNonDecimal();
V8_INLINE JsonToken peek() const { return next_; }
void Consume(JsonToken token) {
DCHECK_EQ(peek(), token);
advance();
}
void Expect(JsonToken token) {
if (V8_LIKELY(peek() == token)) {
advance();
} else {
ReportUnexpectedToken(peek());
}
}
void ExpectNext(JsonToken token) {
SkipWhitespace();
Expect(token);
}
bool Check(JsonToken token) {
SkipWhitespace();
if (next_ != token) return false;
advance();
return true;
}
template <size_t N>
void ScanLiteral(const char (&s)[N]) {
DCHECK(!is_at_end());
// There's at least 1 character, we always consume a character and compare
// the next character. The first character was compared before we jumped
// to ScanLiteral.
STATIC_ASSERT(N > 2);
size_t remaining = static_cast<size_t>(end_ - cursor_);
if (V8_LIKELY(remaining >= N - 1 &&
CompareCharsEqual(s + 1, cursor_ + 1, N - 2))) {
cursor_ += N - 1;
return;
}
cursor_++;
for (size_t i = 0; i < std::min(N - 2, remaining - 1); i++) {
if (*(s + 1 + i) != *cursor_) {
ReportUnexpectedCharacter(*cursor_);
return;
}
cursor_++;
}
DCHECK(is_at_end());
ReportUnexpectedToken(JsonToken::EOS);
}
// The JSON lexical grammar is specified in the ECMAScript 5 standard,
// section 15.12.1.1. The only allowed whitespace characters between tokens
// are tab, carriage-return, newline and space.
void SkipWhitespace();
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
JsonString ScanJsonString(bool needs_internalization);
JsonString ScanJsonPropertyKey(JsonContinuation* cont);
base::uc32 ScanUnicodeCharacter();
Handle<String> MakeString(const JsonString& string,
Handle<String> hint = Handle<String>());
template <typename SinkChar>
void DecodeString(SinkChar* sink, int start, int length);
template <typename SinkSeqString>
Handle<String> DecodeString(const JsonString& string,
Handle<SinkSeqString> intermediate,
Handle<String> hint);
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
// It includes an optional minus sign, must have at least one
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Handle<Object> ParseJsonNumber();
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
// one of "true", "false", or "null", or an object or array literal.
MaybeHandle<Object> ParseJsonValue();
Handle<Object> BuildJsonObject(
const JsonContinuation& cont,
const SmallVector<JsonProperty>& property_stack, Handle<Map> feedback);
Handle<Object> BuildJsonArray(
const JsonContinuation& cont,
const SmallVector<Handle<Object>>& element_stack);
// Mark that a parsing error has happened at the current character.
void ReportUnexpectedCharacter(base::uc32 c);
// Mark that a parsing error has happened at the current token.
void ReportUnexpectedToken(JsonToken token);
inline Isolate* isolate() { return isolate_; }
inline Factory* factory() { return isolate_->factory(); }
inline Handle<JSFunction> object_constructor() { return object_constructor_; }
static const int kInitialSpecialStringLength = 32;
static void UpdatePointersCallback(void* parser) {
reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers();
}
void UpdatePointers() {
DisallowGarbageCollection no_gc;
const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
if (chars_ != chars) {
size_t position = cursor_ - chars_;
size_t length = end_ - chars_;
chars_ = chars;
cursor_ = chars_ + position;
end_ = chars_ + length;
}
}
private:
static const bool kIsOneByte = sizeof(Char) == 1;
bool is_at_end() const {
DCHECK_LE(cursor_, end_);
return cursor_ == end_;
}
int position() const { return static_cast<int>(cursor_ - chars_); }
Isolate* isolate_;
const uint64_t hash_seed_;
JsonToken next_;
// Indicates whether the bytes underneath source_ can relocate during GC.
bool chars_may_relocate_;
Handle<JSFunction> object_constructor_;
const Handle<String> original_source_;
Handle<String> source_;
// Cached pointer to the raw chars in source. In case source is on-heap, we
// register an UpdatePointers callback. For this reason, chars_, cursor_ and
// end_ should never be locally cached across a possible allocation. The scope
// in which we cache chars has to be guarded by a DisallowGarbageCollection
// scope.
const Char* cursor_;
const Char* end_;
const Char* chars_;
};
// Explicit instantiation declarations.
extern template class JsonParser<uint8_t>;
extern template class JsonParser<uint16_t>;
} // namespace internal
} // namespace v8
#endif // V8_JSON_JSON_PARSER_H_