Add a binary parser which sends events to a JsonParserHandler.
In the unittest, roundtrip from JSON to CBOR and back.
Change-Id: I6b53bafbcf52b8d7eeaf57c383f11343609a92c3
diff --git a/BUILD.gn b/BUILD.gn
index 2c07206..c7e77c4 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -57,6 +57,7 @@
]
deps = [
":binary_encoding",
+ ":json_std_string_writer",
":linux_dev_platform",
"//testing:gtest_main",
"//third_party/gtest:gmock",
diff --git a/encoding/binary_encoding.cc b/encoding/binary_encoding.cc
index 74619ee..24a9149 100644
--- a/encoding/binary_encoding.cc
+++ b/encoding/binary_encoding.cc
@@ -69,6 +69,10 @@
static constexpr uint8_t kStopByte =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
+// When parsing binary (CBOR), we limit recursion depth for objects and arrays
+// to this constant.
+static constexpr int kStackLimit = 1000;
+
// Writes the bytes for |v| to |out|, starting with the most significant byte.
// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
template <typename T>
@@ -227,6 +231,32 @@
internal::EncodeNegative(value, out);
}
+bool DecodeSigned(span<uint8_t>* bytes, int32_t* value) {
+ MajorType type;
+ span<uint8_t> internal_bytes = *bytes;
+ uint64_t encoded_value;
+ if (!ReadItemStart(&internal_bytes, &type, &encoded_value)) return false;
+ // It's unfortunate that we're rejecting perfectly fine CBOR encoded UNSIGNED
+ // / NEGATIVE values here if they're outside the range of int32_t. This is
+ // (for now) for compatibility with JSON, or more specifically with what our
+ // parser supports via JsonParserHandler::HandleInt.
+ if (type == MajorType::UNSIGNED) {
+ if (encoded_value <= std::numeric_limits<int32_t>::max()) {
+ *value = encoded_value;
+ *bytes = internal_bytes;
+ return true;
+ }
+ } else if (type == MajorType::NEGATIVE) {
+ int64_t decoded_value = -static_cast<int64_t>(encoded_value) - 1;
+ if (decoded_value >= std::numeric_limits<int32_t>::min()) {
+ *value = decoded_value;
+ *bytes = internal_bytes;
+ return true;
+ }
+ }
+ return false;
+}
+
void EncodeUTF16String(span<uint16_t> in, std::vector<uint8_t>* out) {
WriteItemStart(MajorType::BYTE_STRING, static_cast<uint64_t>(in.size_bytes()),
out);
@@ -296,6 +326,7 @@
return true;
}
+namespace {
class JsonToBinaryEncoder : public JsonParserHandler {
public:
JsonToBinaryEncoder(std::vector<uint8_t>* out, Status* status)
@@ -343,9 +374,153 @@
std::vector<uint8_t>* out_;
Status* status_;
};
+} // namespace
std::unique_ptr<JsonParserHandler> NewJsonToBinaryEncoder(
std::vector<uint8_t>* out, Status* status) {
return std::make_unique<JsonToBinaryEncoder>(out, status);
}
+
+namespace {
+// Below are three parsing routines for CBOR / binary, which cover enough
+// to roundtrip JSON messages.
+Error ParseMap(int32_t stack_depth, span<uint8_t>* bytes,
+ JsonParserHandler* out);
+Error ParseArray(int32_t stack_depth, span<uint8_t>* bytes,
+ JsonParserHandler* out);
+Error ParseValue(int32_t stack_depth, span<uint8_t>* bytes,
+ JsonParserHandler* out);
+
+Error ParseValue(int32_t stack_depth, span<uint8_t>* bytes,
+ JsonParserHandler* out) {
+ if (stack_depth > kStackLimit)
+ return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
+ if (bytes->empty()) return Error::BINARY_ENCODING_UNEXPECTED_EOF;
+ // First we dispatch on the entire initial byte. Only when this doesn't
+ // give satisfaction do we use the major types (first three bits)
+ // to dispatch between a few more choices below.
+ switch ((*bytes)[0]) {
+ case kEncodedTrue:
+ out->HandleBool(true);
+ *bytes = bytes->subspan(1);
+ return Error::OK;
+ case kEncodedFalse:
+ out->HandleBool(false);
+ *bytes = bytes->subspan(1);
+ return Error::OK;
+ case kEncodedNull:
+ out->HandleNull();
+ *bytes = bytes->subspan(1);
+ return Error::OK;
+ case kInitialByteForDouble: {
+ double value;
+ if (!DecodeDouble(bytes, &value))
+ return Error::BINARY_ENCODING_INVALID_DOUBLE;
+ out->HandleDouble(value);
+ return Error::OK;
+ }
+ case kInitialByteIndefiniteLengthArray:
+ return ParseArray(stack_depth + 1, bytes, out);
+ case kInitialByteIndefiniteLengthMap:
+ return ParseMap(stack_depth + 1, bytes, out);
+ default:
+ break;
+ }
+ switch ((*bytes)[0] >> kMajorTypeBitShift) {
+ case uint8_t(MajorType::UNSIGNED):
+ case uint8_t(MajorType::NEGATIVE): {
+ int32_t value;
+ if (!DecodeSigned(bytes, &value))
+ return Error::BINARY_ENCODING_INVALID_SIGNED;
+ out->HandleInt(value);
+ return Error::OK;
+ }
+ case uint8_t(MajorType::BYTE_STRING): {
+ std::vector<uint16_t> value;
+ if (!DecodeUTF16String(bytes, &value))
+ return Error::BINARY_ENCODING_INVALID_STRING16;
+ out->HandleString(std::move(value));
+ return Error::OK;
+ }
+ case uint8_t(MajorType::STRING): // utf8, todo
+ case uint8_t(MajorType::ARRAY): // indef length case handled above
+ case uint8_t(MajorType::MAP): // indef length case handled above
+ case uint8_t(MajorType::TAG): // todo
+ case uint8_t(MajorType::SIMPLE_VALUE): // supported cases handled above
+ default:
+ return Error::BINARY_ENCODING_UNSUPPORTED_VALUE;
+ }
+}
+
+// |bytes| must start with the indefinite length array byte, so basically,
+// ParseArray may only be called after an indefinite length array has been
+// detected.
+Error ParseArray(int32_t stack_depth, span<uint8_t>* bytes,
+ JsonParserHandler* out) {
+ assert(!bytes->empty());
+ assert((*bytes)[0] == kInitialByteIndefiniteLengthArray);
+
+ if (stack_depth > kStackLimit)
+ return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
+ *bytes = bytes->subspan(1);
+ out->HandleArrayBegin();
+ while (!bytes->empty()) {
+ // Parse end of array.
+ if ((*bytes)[0] == kStopByte) {
+ out->HandleArrayEnd();
+ return Error::OK;
+ }
+ // Parse value.
+ Error status = ParseValue(stack_depth + 1, bytes, out);
+ if (status != Error::OK) return status;
+ }
+ return Error::BINARY_ENCODING_UNEXPECTED_EOF;
+}
+
+// |bytes| must start with the indefinite length array byte, so basically,
+// ParseArray may only be called after an indefinite length array has been
+// detected.
+Error ParseMap(int32_t stack_depth, span<uint8_t>* bytes,
+ JsonParserHandler* out) {
+ assert(!bytes->empty());
+ assert((*bytes)[0] == kInitialByteIndefiniteLengthMap);
+
+ if (stack_depth > kStackLimit)
+ return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
+ *bytes = bytes->subspan(1);
+ out->HandleObjectBegin();
+ while (!bytes->empty()) {
+ // Parse end of map.
+ if ((*bytes)[0] == kStopByte) {
+ out->HandleObjectEnd();
+ return Error::OK;
+ }
+ // Parse key.
+ std::vector<uint16_t> key;
+ if (!DecodeUTF16String(bytes, &key))
+ return Error::BINARY_ENCODING_INVALID_MAP_KEY;
+ out->HandleString(std::move(key));
+ // Parse value.
+ Error status = ParseValue(stack_depth + 1, bytes, out);
+ if (status != Error::OK) return status;
+ }
+ return Error::BINARY_ENCODING_UNEXPECTED_EOF;
+}
+} // namespace
+
+void ParseBinary(span<uint8_t> bytes, JsonParserHandler* json_out) {
+ if (bytes.empty()) {
+ json_out->HandleError(Status{Error::BINARY_ENCODING_UNEXPECTED_EOF, 0});
+ return;
+ }
+ if (bytes[0] != kInitialByteIndefiniteLengthMap) {
+ json_out->HandleError(
+ Status{Error::BINARY_ENCODING_INDEFINITE_LENGTH_MAP_START_EXPECTED, 0});
+ return;
+ }
+ span<uint8_t> internal_bytes = bytes;
+ Error error = ParseMap(/*stack_depth=*/1, &internal_bytes, json_out);
+ if (error == Error::OK) return;
+ json_out->HandleError(Status{error, bytes.size() - internal_bytes.size()});
+}
} // namespace inspector_protocol
diff --git a/encoding/binary_encoding.h b/encoding/binary_encoding.h
index 635c551..ace3a86 100644
--- a/encoding/binary_encoding.h
+++ b/encoding/binary_encoding.h
@@ -28,6 +28,10 @@
// (major type 1) iff < 0.
void EncodeSigned(int32_t value, std::vector<uint8_t>* out);
+// Decodes |value| from |bytes|, if it's encoded as either UNSIGNED
+// or NEGATIVE and within range of int32_t. Otherwise returns false.
+bool DecodeSigned(span<uint8_t>* bytes, int32_t* value);
+
// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
// character in |in| is emitted with most significant byte first,
// appending to |out|.
@@ -51,5 +55,11 @@
// |out|. Otherwise, |status.ok()| will be |true|.
std::unique_ptr<JsonParserHandler> NewJsonToBinaryEncoder(
std::vector<uint8_t>* out, Status* status);
+
+// Parses a binary encoded message from |bytes|, sending JSON events to
+// |json_out|. If an error occurs, sends |out->HandleError|, and parsing stops.
+// The client is responsible for discarding the already received information in
+// that case.
+void ParseBinary(span<uint8_t> bytes, JsonParserHandler* json_out);
} // namespace inspector_protocol
#endif // INSPECTOR_PROTOCOL_ENCODING_BINARY_ENCODING_H_
diff --git a/encoding/binary_encoding_test.cc b/encoding/binary_encoding_test.cc
index 889a3dd..9238808 100644
--- a/encoding/binary_encoding_test.cc
+++ b/encoding/binary_encoding_test.cc
@@ -10,6 +10,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "json_parser.h"
+#include "json_std_string_writer.h"
#include "linux_dev_platform.h"
using testing::ElementsAreArray;
@@ -334,21 +335,23 @@
EncodeUTF16String(span<uint16_t>(utf16.data(), utf16.size()), out);
}
-TEST(JsonToCborConversion, Encoding) {
- // Hits all the cases except error in JsonParserHandler.
- std::string json = R"raw({
- "string": "Hello, \ud83c\udf0e.",
- "double": 3.1415,
- "int": 1,
- "negative int": -1,
- "bool": true,
- "null": null,
- "array": [1,2,3]
- })raw";
- std::vector<uint8_t> out;
+TEST(JsonCborRoundtrip, EncodingDecoding) {
+ // Hits all the cases except error in JsonParserHandler, first parsing
+ // a JSON message into CBOR, then parsing it back from CBOR into JSON.
+ std::string json =
+ "{"
+ "\"string\":\"Hello, \\ud83c\\udf0e.\","
+ "\"double\":3.1415,"
+ "\"int\":1,"
+ "\"negative int\":-1,"
+ "\"bool\":true,"
+ "\"null\":null,"
+ "\"array\":[1,2,3]"
+ "}";
+ std::vector<uint8_t> encoded;
Status status;
std::unique_ptr<JsonParserHandler> encoder =
- NewJsonToBinaryEncoder(&out, &status);
+ NewJsonToBinaryEncoder(&encoded, &status);
span<uint8_t> ascii_in(reinterpret_cast<const uint8_t*>(json.data()),
json.size());
parseJSONChars(GetLinuxDevPlatform(), ascii_in, encoder.get());
@@ -381,6 +384,49 @@
expected.push_back(0xff); // End indef length array
expected.push_back(0xff); // End indef length map
EXPECT_TRUE(status.ok());
- EXPECT_THAT(out, ElementsAreArray(expected));
+ EXPECT_THAT(encoded, ElementsAreArray(expected));
+
+ // And now we roundtrip, decoding the message we just encoded.
+ std::string decoded;
+ std::unique_ptr<JsonParserHandler> json_writer =
+ NewJsonWriter(GetLinuxDevPlatform(), &decoded, &status);
+ ParseBinary(span<uint8_t>(encoded.data(), encoded.size()), json_writer.get());
+ EXPECT_EQ(Error::OK, status.error);
+ EXPECT_EQ(json, decoded);
+}
+
+TEST(ParseBinaryTest, ParseEmptyBinaryMessage) {
+ // Just an indefinite length map that's empty (0xff = stop byte).
+ std::vector<uint8_t> in = {0xbf, 0xff};
+ std::string out;
+ Status status;
+ std::unique_ptr<JsonParserHandler> json_writer =
+ NewJsonWriter(GetLinuxDevPlatform(), &out, &status);
+ ParseBinary(span<uint8_t>(in.data(), in.size()), json_writer.get());
+ EXPECT_EQ(Error::OK, status.error);
+ EXPECT_EQ("{}", out);
+}
+
+TEST(ParseBinaryTest, ParseBinaryHelloWorld) {
+ std::vector<uint8_t> bytes;
+
+ bytes.push_back(0xbf); // start indef length map.
+ EncodeAsciiStringForTest("msg", &bytes); // key: msg
+ // Now write the value, the familiar "Hello, 🌎." where the globe is expressed
+ // as two utf16 chars.
+ bytes.push_back(/*major type=*/2 << 5 | /*additional info=*/20);
+ for (uint8_t ch : std::array<uint8_t, 20>{
+ {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0,
+ ',', 0, ' ', 0, 0x3c, 0xd8, 0x0e, 0xdf, '.', 0}})
+ bytes.push_back(ch);
+ bytes.push_back(0xff); // stop byte
+
+ std::string out;
+ Status status;
+ std::unique_ptr<JsonParserHandler> json_writer =
+ NewJsonWriter(GetLinuxDevPlatform(), &out, &status);
+ ParseBinary(span<uint8_t>(bytes.data(), bytes.size()), json_writer.get());
+ EXPECT_EQ(Error::OK, status.error);
+ EXPECT_EQ("{\"msg\":\"Hello, \\ud83c\\udf0e.\"}", out);
}
} // namespace inspector_protocol
diff --git a/encoding/status.h b/encoding/status.h
index 9b15d44..0925743 100644
--- a/encoding/status.h
+++ b/encoding/status.h
@@ -13,19 +13,29 @@
enum class Error {
OK = 0,
// JSON parsing errors - json_parser.{h,cc}.
- JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 1,
- JSON_PARSER_STACK_LIMIT_EXCEEDED = 2,
- JSON_PARSER_NO_INPUT = 3,
- JSON_PARSER_INVALID_TOKEN = 4,
- JSON_PARSER_INVALID_NUMBER = 5,
- JSON_PARSER_INVALID_STRING = 6,
- JSON_PARSER_UNEXPECTED_ARRAY_END = 7,
- JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 8,
- JSON_PARSER_STRING_LITERAL_EXPECTED = 9,
- JSON_PARSER_COLON_EXPECTED = 10,
- JSON_PARSER_UNEXPECTED_OBJECT_END = 11,
- JSON_PARSER_COMMA_OR_OBJECT_END_EXPECTED = 12,
- JSON_PARSER_VALUE_EXPECTED = 13,
+ JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01,
+ JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02,
+ JSON_PARSER_NO_INPUT = 0x03,
+ JSON_PARSER_INVALID_TOKEN = 0x04,
+ JSON_PARSER_INVALID_NUMBER = 0x05,
+ JSON_PARSER_INVALID_STRING = 0x06,
+ JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07,
+ JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08,
+ JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09,
+ JSON_PARSER_COLON_EXPECTED = 0x0a,
+ JSON_PARSER_UNEXPECTED_OBJECT_END = 0x0b,
+ JSON_PARSER_COMMA_OR_OBJECT_END_EXPECTED = 0x0c,
+ JSON_PARSER_VALUE_EXPECTED = 0x0d,
+
+ BINARY_ENCODING_UNEXPECTED_EOF = 0x0e,
+ BINARY_ENCODING_INDEFINITE_LENGTH_MAP_START_EXPECTED = 0x0f,
+ BINARY_ENCODING_KEY_OR_STOP_BYTE_EXPECTED = 0x10,
+ BINARY_ENCODING_INVALID_MAP_KEY = 0x11,
+ BINARY_ENCODING_STACK_LIMIT_EXCEEDED = 0x12,
+ BINARY_ENCODING_UNSUPPORTED_VALUE = 0x13,
+ BINARY_ENCODING_INVALID_STRING16 = 0x14,
+ BINARY_ENCODING_INVALID_DOUBLE = 0x15,
+ BINARY_ENCODING_INVALID_SIGNED = 0x16,
};
// A status value with position that can be copied. The default status