blob: 24a9149a6194e96e17b8483a885c6bbe58c4fb5e [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "binary_encoding.h"
#include <cassert>
#include <limits>
#include "json_parser_handler.h"
namespace inspector_protocol {
namespace {
// The major types from RFC 7049 Section 2.1.
enum class MajorType {
UNSIGNED = 0,
NEGATIVE = 1,
BYTE_STRING = 2,
STRING = 3,
ARRAY = 4,
MAP = 5,
TAG = 6,
SIMPLE_VALUE = 7
};
// Indicates the number of bits the "initial byte" needs to be shifted to the
// right after applying |kMajorTypeMask| to produce the major type in the
// lowermost bits.
static constexpr uint8_t kMajorTypeBitShift = 5u;
// Mask selecting the low-order 5 bits of the "initial byte", which is where
// the additional information is encoded.
static constexpr uint8_t kAdditionalInformationMask = 0x1f;
// Mask selecting the high-order 3 bits of the "initial byte", which indicates
// the major type of the encoded value.
static constexpr uint8_t kMajorTypeMask = 0xe0;
// Indicates the integer is in the following byte.
static constexpr uint8_t kAdditionalInformation1Byte = 24u;
// Indicates the integer is in the next 2 bytes.
static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
// Indicates the integer is in the next 4 bytes.
static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
// Indicates the integer is in the next 8 bytes.
static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
// Encodes the initial byte, consisting of the |type| in the first 3 bits
// followed by 5 bits of |additional_info|.
constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
return (uint8_t(type) << kMajorTypeBitShift) |
(additional_info & kAdditionalInformationMask);
}
// See RFC 7049 Section 2.3, Table 2.
static constexpr uint8_t kEncodedTrue =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
static constexpr uint8_t kEncodedFalse =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
static constexpr uint8_t kEncodedNull =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
static constexpr uint8_t kInitialByteForDouble =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
// info = 31.
static constexpr uint8_t kInitialByteIndefiniteLengthArray =
EncodeInitialByte(MajorType::ARRAY, 31);
static constexpr uint8_t kInitialByteIndefiniteLengthMap =
EncodeInitialByte(MajorType::MAP, 31);
// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
// length maps / arrays.
static constexpr uint8_t kStopByte =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
// When parsing binary (CBOR), we limit recursion depth for objects and arrays
// to this constant.
static constexpr int kStackLimit = 1000;
// Writes the bytes for |v| to |out|, starting with the most significant byte.
// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
template <typename T>
void WriteBytesMostSignificantByteFirst(T v, std::vector<uint8_t>* out) {
for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
out->push_back(0xff & v >> (shift_bytes * 8));
}
// Writes the start of an item with |type|. The |value| may indicate the size,
// or it may be the payload if the value is an unsigned integer.
void WriteItemStart(MajorType type, uint64_t value,
std::vector<uint8_t>* encoded) {
if (value < 24) {
// Values 0-23 are encoded directly into the additional info of the
// initial byte.
encoded->push_back(EncodeInitialByte(type, /*additiona_info=*/value));
return;
}
if (value <= std::numeric_limits<uint8_t>::max()) {
// Values 24-255 are encoded with one initial byte, followed by the value.
encoded->reserve(encoded->size() + 1 + sizeof(uint8_t));
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
encoded->push_back(value);
return;
}
if (value <= std::numeric_limits<uint16_t>::max()) {
// Values 256-65535: 1 initial byte + 2 bytes payload.
encoded->reserve(encoded->size() + 1 + sizeof(uint16_t));
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
return;
}
if (value <= std::numeric_limits<uint32_t>::max()) {
// 32 bit uint: 1 initial byte + 4 bytes payload.
encoded->reserve(encoded->size() + 1 + sizeof(uint32_t));
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
WriteBytesMostSignificantByteFirst<uint32_t>(value, encoded);
return;
}
// 64 bit uint: 1 initial byte + 8 bytes payload.
encoded->reserve(encoded->size() + 1 + sizeof(uint64_t));
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
}
// Extracts sizeof(T) bytes from |in| to extract a value of type T
// (e.g. uint64_t, uint32_t, ...), most significant byte first.
// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
template <typename T>
T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
assert(size_t(in.size()) >= sizeof(T));
T result = 0;
for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
return result;
}
// Reads the start of an item with definitive size from |in|.
// |type| is the major type as specified in RFC 7049 Section 2.1.
// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
// (e.g. for BYTE_STRING). Remainder is the first byte after
// the item start, that is, the first byte after the encoded value / size.
// Returns true iff successful.
bool ReadItemStart(span<uint8_t>* bytes, MajorType* type, uint64_t* value) {
if (bytes->empty()) return false;
uint8_t initial_byte = (*bytes)[0];
*type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
uint8_t additional_information = initial_byte & kAdditionalInformationMask;
if (additional_information < 24) {
// Values 0-23 are encoded directly into the additional info of the
// initial byte.
*value = additional_information;
*bytes = bytes->subspan(1);
return true;
}
if (additional_information == kAdditionalInformation1Byte) {
// Values 24-255 are encoded with one initial byte, followed by the value.
if (bytes->size() < 2) return false;
*value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes->subspan(1));
*bytes = bytes->subspan(2);
return true;
}
if (additional_information == kAdditionalInformation2Bytes) {
// Values 256-65535: 1 initial byte + 2 bytes payload.
if (static_cast<size_t>(bytes->size()) < 1 + sizeof(uint16_t)) return false;
*value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes->subspan(1));
*bytes = bytes->subspan(1 + sizeof(uint16_t));
return true;
}
if (additional_information == kAdditionalInformation4Bytes) {
// 32 bit uint: 1 initial byte + 4 bytes payload.
if (static_cast<size_t>(bytes->size()) < 1 + sizeof(uint32_t)) return false;
*value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes->subspan(1));
*bytes = bytes->subspan(1 + sizeof(uint32_t));
return true;
}
if (additional_information == kAdditionalInformation8Bytes) {
// 64 bit uint: 1 initial byte + 8 bytes payload.
if (static_cast<size_t>(bytes->size()) < 1 + sizeof(uint64_t)) return false;
*value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes->subspan(1));
*bytes = bytes->subspan(1 + sizeof(uint64_t));
return true;
}
return false;
}
} // namespace
void EncodeUnsigned(uint64_t value, std::vector<uint8_t>* out) {
WriteItemStart(MajorType::UNSIGNED, value, out);
}
bool DecodeUnsigned(span<uint8_t>* bytes, uint64_t* value) {
MajorType type;
span<uint8_t> internal_bytes = *bytes;
uint64_t internal_value;
if (!ReadItemStart(&internal_bytes, &type, &internal_value)) return false;
if (type != MajorType::UNSIGNED) return false;
*bytes = internal_bytes;
*value = internal_value;
return true;
}
namespace internal {
// The following two routines implement encoding / decoding for NEGATIVE,
// Major Type 1 (see RFC 7049, Section 2.1). However, we will (thus far)
// only be using int32_t values anyway, so the method exposed in the
// header instead EncodeSigned (below), which dispatches between
// UNSIGNED and NEGATIVE.
// Encodes |value| as NEGATIVE (major type 1). |value| must be negative.
void EncodeNegative(int64_t value, std::vector<uint8_t>* out) {
assert(value < 0);
WriteItemStart(MajorType::NEGATIVE, static_cast<uint64_t>(-(value + 1)), out);
}
// Decodes |value| from |bytes|, assuming that it's encoded as NEGATIVE.
// (major type 1). Iff successful, updates |bytes| to position after
// the negative int and returns true.
bool DecodeNegative(span<uint8_t>* bytes, int64_t* value) {
MajorType type;
span<uint8_t> internal_bytes = *bytes;
uint64_t encoded_value;
if (!ReadItemStart(&internal_bytes, &type, &encoded_value)) return false;
if (type != MajorType::NEGATIVE) return false;
*value = -static_cast<int64_t>(encoded_value) - 1;
*bytes = internal_bytes;
return true;
}
} // namespace internal
void EncodeSigned(int32_t value, std::vector<uint8_t>* out) {
if (value >= 0)
EncodeUnsigned(value, out);
else
internal::EncodeNegative(value, out);
}
bool DecodeSigned(span<uint8_t>* bytes, int32_t* value) {
MajorType type;
span<uint8_t> internal_bytes = *bytes;
uint64_t encoded_value;
if (!ReadItemStart(&internal_bytes, &type, &encoded_value)) return false;
// It's unfortunate that we're rejecting perfectly fine CBOR encoded UNSIGNED
// / NEGATIVE values here if they're outside the range of int32_t. This is
// (for now) for compatibility with JSON, or more specifically with what our
// parser supports via JsonParserHandler::HandleInt.
if (type == MajorType::UNSIGNED) {
if (encoded_value <= std::numeric_limits<int32_t>::max()) {
*value = encoded_value;
*bytes = internal_bytes;
return true;
}
} else if (type == MajorType::NEGATIVE) {
int64_t decoded_value = -static_cast<int64_t>(encoded_value) - 1;
if (decoded_value >= std::numeric_limits<int32_t>::min()) {
*value = decoded_value;
*bytes = internal_bytes;
return true;
}
}
return false;
}
void EncodeUTF16String(span<uint16_t> in, std::vector<uint8_t>* out) {
WriteItemStart(MajorType::BYTE_STRING, static_cast<uint64_t>(in.size_bytes()),
out);
// When emitting UTF16 characters, we always write the least significant byte
// first; this is because it's the native representation for X86.
// TODO(johannes): Implement a more efficient thing here later, e.g.
// casting *iff* the machine has this byte order.
// The wire format for UTF16 chars will probably remain the same
// (least significant byte first) since this way we can have
// golden files, unittests, etc. that port easily and universally.
// See also:
// https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
for (const uint16_t two_bytes : in) {
out->push_back(two_bytes);
out->push_back(two_bytes >> 8);
}
}
bool DecodeUTF16String(span<uint8_t>* bytes, std::vector<uint16_t>* str) {
MajorType type;
uint64_t num_bytes;
span<uint8_t> internal_bytes = *bytes; // only written upon success
if (!ReadItemStart(&internal_bytes, &type, &num_bytes)) return false;
if (type != MajorType::BYTE_STRING) return false;
if (static_cast<size_t>(internal_bytes.size()) < num_bytes) return false;
// Must be divisible by 2 since UTF16 is 2 bytes per character.
if (num_bytes & 1) return false;
assert(str->empty());
str->reserve(num_bytes);
for (size_t ii = 0; ii < num_bytes; ii += 2) {
uint16_t high_byte = internal_bytes[ii + 1];
uint16_t low_byte = internal_bytes[ii];
str->push_back((high_byte << 8) | low_byte);
}
*bytes = internal_bytes.subspan(num_bytes);
return true;
}
// A double is encoded with a specific initial byte
// (kInitialByteForDouble) plus the 64 bits of payload for its value.
constexpr int kEncodedDoubleSize = 1 + sizeof(uint64_t);
void EncodeDouble(double value, std::vector<uint8_t>* out) {
// The additional_info=27 indicates 64 bits for the double follow.
// See RFC 7049 Section 2.3, Table 1.
out->reserve(out->size() + kEncodedDoubleSize);
out->push_back(kInitialByteForDouble);
union {
double from_double;
uint64_t to_uint64;
} reinterpret;
reinterpret.from_double = value;
WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
}
bool DecodeDouble(span<uint8_t>* bytes, double* value) {
if (bytes->size() < kEncodedDoubleSize) return false;
if ((*bytes)[0] != kInitialByteForDouble) return false;
union {
uint64_t from_uint64;
double to_double;
} reinterpret;
reinterpret.from_uint64 =
ReadBytesMostSignificantByteFirst<uint64_t>(bytes->subspan(1));
*value = reinterpret.to_double;
*bytes = bytes->subspan(kEncodedDoubleSize);
return true;
}
namespace {
class JsonToBinaryEncoder : public JsonParserHandler {
public:
JsonToBinaryEncoder(std::vector<uint8_t>* out, Status* status)
: out_(out), status_(status) {
*status_ = Status();
}
void HandleObjectBegin() override {
out_->push_back(kInitialByteIndefiniteLengthMap);
}
void HandleObjectEnd() override { out_->push_back(kStopByte); };
void HandleArrayBegin() override {
out_->push_back(kInitialByteIndefiniteLengthArray);
}
void HandleArrayEnd() override { out_->push_back(kStopByte); };
void HandleString(std::vector<uint16_t> chars) override {
EncodeUTF16String(span<uint16_t>(chars.data(), chars.size()), out_);
}
void HandleDouble(double value) override { EncodeDouble(value, out_); };
void HandleInt(int32_t value) override { EncodeSigned(value, out_); }
void HandleBool(bool value) override {
// See RFC 7049 Section 2.3, Table 2.
out_->push_back(value ? kEncodedTrue : kEncodedFalse);
}
void HandleNull() override {
// See RFC 7049 Section 2.3, Table 2.
out_->push_back(kEncodedNull);
}
void HandleError(Status error) override {
assert(!error.ok());
*status_ = error;
out_->clear();
}
private:
std::vector<uint8_t>* out_;
Status* status_;
};
} // namespace
std::unique_ptr<JsonParserHandler> NewJsonToBinaryEncoder(
std::vector<uint8_t>* out, Status* status) {
return std::make_unique<JsonToBinaryEncoder>(out, status);
}
namespace {
// Below are three parsing routines for CBOR / binary, which cover enough
// to roundtrip JSON messages.
Error ParseMap(int32_t stack_depth, span<uint8_t>* bytes,
JsonParserHandler* out);
Error ParseArray(int32_t stack_depth, span<uint8_t>* bytes,
JsonParserHandler* out);
Error ParseValue(int32_t stack_depth, span<uint8_t>* bytes,
JsonParserHandler* out);
Error ParseValue(int32_t stack_depth, span<uint8_t>* bytes,
JsonParserHandler* out) {
if (stack_depth > kStackLimit)
return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
if (bytes->empty()) return Error::BINARY_ENCODING_UNEXPECTED_EOF;
// First we dispatch on the entire initial byte. Only when this doesn't
// give satisfaction do we use the major types (first three bits)
// to dispatch between a few more choices below.
switch ((*bytes)[0]) {
case kEncodedTrue:
out->HandleBool(true);
*bytes = bytes->subspan(1);
return Error::OK;
case kEncodedFalse:
out->HandleBool(false);
*bytes = bytes->subspan(1);
return Error::OK;
case kEncodedNull:
out->HandleNull();
*bytes = bytes->subspan(1);
return Error::OK;
case kInitialByteForDouble: {
double value;
if (!DecodeDouble(bytes, &value))
return Error::BINARY_ENCODING_INVALID_DOUBLE;
out->HandleDouble(value);
return Error::OK;
}
case kInitialByteIndefiniteLengthArray:
return ParseArray(stack_depth + 1, bytes, out);
case kInitialByteIndefiniteLengthMap:
return ParseMap(stack_depth + 1, bytes, out);
default:
break;
}
switch ((*bytes)[0] >> kMajorTypeBitShift) {
case uint8_t(MajorType::UNSIGNED):
case uint8_t(MajorType::NEGATIVE): {
int32_t value;
if (!DecodeSigned(bytes, &value))
return Error::BINARY_ENCODING_INVALID_SIGNED;
out->HandleInt(value);
return Error::OK;
}
case uint8_t(MajorType::BYTE_STRING): {
std::vector<uint16_t> value;
if (!DecodeUTF16String(bytes, &value))
return Error::BINARY_ENCODING_INVALID_STRING16;
out->HandleString(std::move(value));
return Error::OK;
}
case uint8_t(MajorType::STRING): // utf8, todo
case uint8_t(MajorType::ARRAY): // indef length case handled above
case uint8_t(MajorType::MAP): // indef length case handled above
case uint8_t(MajorType::TAG): // todo
case uint8_t(MajorType::SIMPLE_VALUE): // supported cases handled above
default:
return Error::BINARY_ENCODING_UNSUPPORTED_VALUE;
}
}
// |bytes| must start with the indefinite length array byte, so basically,
// ParseArray may only be called after an indefinite length array has been
// detected.
Error ParseArray(int32_t stack_depth, span<uint8_t>* bytes,
JsonParserHandler* out) {
assert(!bytes->empty());
assert((*bytes)[0] == kInitialByteIndefiniteLengthArray);
if (stack_depth > kStackLimit)
return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
*bytes = bytes->subspan(1);
out->HandleArrayBegin();
while (!bytes->empty()) {
// Parse end of array.
if ((*bytes)[0] == kStopByte) {
out->HandleArrayEnd();
return Error::OK;
}
// Parse value.
Error status = ParseValue(stack_depth + 1, bytes, out);
if (status != Error::OK) return status;
}
return Error::BINARY_ENCODING_UNEXPECTED_EOF;
}
// |bytes| must start with the indefinite length array byte, so basically,
// ParseArray may only be called after an indefinite length array has been
// detected.
Error ParseMap(int32_t stack_depth, span<uint8_t>* bytes,
JsonParserHandler* out) {
assert(!bytes->empty());
assert((*bytes)[0] == kInitialByteIndefiniteLengthMap);
if (stack_depth > kStackLimit)
return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
*bytes = bytes->subspan(1);
out->HandleObjectBegin();
while (!bytes->empty()) {
// Parse end of map.
if ((*bytes)[0] == kStopByte) {
out->HandleObjectEnd();
return Error::OK;
}
// Parse key.
std::vector<uint16_t> key;
if (!DecodeUTF16String(bytes, &key))
return Error::BINARY_ENCODING_INVALID_MAP_KEY;
out->HandleString(std::move(key));
// Parse value.
Error status = ParseValue(stack_depth + 1, bytes, out);
if (status != Error::OK) return status;
}
return Error::BINARY_ENCODING_UNEXPECTED_EOF;
}
} // namespace
void ParseBinary(span<uint8_t> bytes, JsonParserHandler* json_out) {
if (bytes.empty()) {
json_out->HandleError(Status{Error::BINARY_ENCODING_UNEXPECTED_EOF, 0});
return;
}
if (bytes[0] != kInitialByteIndefiniteLengthMap) {
json_out->HandleError(
Status{Error::BINARY_ENCODING_INDEFINITE_LENGTH_MAP_START_EXPECTED, 0});
return;
}
span<uint8_t> internal_bytes = bytes;
Error error = ParseMap(/*stack_depth=*/1, &internal_bytes, json_out);
if (error == Error::OK) return;
json_out->HandleError(Status{error, bytes.size() - internal_bytes.size()});
}
} // namespace inspector_protocol