Add a binary parser which sends events to a JsonParserHandler.

In the unittest, roundtrip from JSON to CBOR and back.

Change-Id: I6b53bafbcf52b8d7eeaf57c383f11343609a92c3
diff --git a/BUILD.gn b/BUILD.gn
index 2c07206..c7e77c4 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -57,6 +57,7 @@
   ]
   deps = [
     ":binary_encoding",
+    ":json_std_string_writer",
     ":linux_dev_platform",
     "//testing:gtest_main",
     "//third_party/gtest:gmock",
diff --git a/encoding/binary_encoding.cc b/encoding/binary_encoding.cc
index 74619ee..24a9149 100644
--- a/encoding/binary_encoding.cc
+++ b/encoding/binary_encoding.cc
@@ -69,6 +69,10 @@
 static constexpr uint8_t kStopByte =
     EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
 
+// When parsing binary (CBOR), we limit recursion depth for objects and arrays
+// to this constant.
+static constexpr int kStackLimit = 1000;
+
 // Writes the bytes for |v| to |out|, starting with the most significant byte.
 // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
 template <typename T>
@@ -227,6 +231,32 @@
     internal::EncodeNegative(value, out);
 }
 
+bool DecodeSigned(span<uint8_t>* bytes, int32_t* value) {
+  MajorType type;
+  span<uint8_t> internal_bytes = *bytes;
+  uint64_t encoded_value;
+  if (!ReadItemStart(&internal_bytes, &type, &encoded_value)) return false;
+  // It's unfortunate that we're rejecting perfectly fine CBOR encoded UNSIGNED
+  // / NEGATIVE values here if they're outside the range of int32_t. This is
+  // (for now) for compatibility with JSON, or more specifically with what our
+  // parser supports via JsonParserHandler::HandleInt.
+  if (type == MajorType::UNSIGNED) {
+    if (encoded_value <= std::numeric_limits<int32_t>::max()) {
+      *value = encoded_value;
+      *bytes = internal_bytes;
+      return true;
+    }
+  } else if (type == MajorType::NEGATIVE) {
+    int64_t decoded_value = -static_cast<int64_t>(encoded_value) - 1;
+    if (decoded_value >= std::numeric_limits<int32_t>::min()) {
+      *value = decoded_value;
+      *bytes = internal_bytes;
+      return true;
+    }
+  }
+  return false;
+}
+
 void EncodeUTF16String(span<uint16_t> in, std::vector<uint8_t>* out) {
   WriteItemStart(MajorType::BYTE_STRING, static_cast<uint64_t>(in.size_bytes()),
                  out);
@@ -296,6 +326,7 @@
   return true;
 }
 
+namespace {
 class JsonToBinaryEncoder : public JsonParserHandler {
  public:
   JsonToBinaryEncoder(std::vector<uint8_t>* out, Status* status)
@@ -343,9 +374,153 @@
   std::vector<uint8_t>* out_;
   Status* status_;
 };
+}  // namespace
 
 std::unique_ptr<JsonParserHandler> NewJsonToBinaryEncoder(
     std::vector<uint8_t>* out, Status* status) {
   return std::make_unique<JsonToBinaryEncoder>(out, status);
 }
+
+namespace {
+// Below are three parsing routines for CBOR / binary, which cover enough
+// to roundtrip JSON messages.
+Error ParseMap(int32_t stack_depth, span<uint8_t>* bytes,
+               JsonParserHandler* out);
+Error ParseArray(int32_t stack_depth, span<uint8_t>* bytes,
+                 JsonParserHandler* out);
+Error ParseValue(int32_t stack_depth, span<uint8_t>* bytes,
+                 JsonParserHandler* out);
+
+Error ParseValue(int32_t stack_depth, span<uint8_t>* bytes,
+                 JsonParserHandler* out) {
+  if (stack_depth > kStackLimit)
+    return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
+  if (bytes->empty()) return Error::BINARY_ENCODING_UNEXPECTED_EOF;
+  // First we dispatch on the entire initial byte. Only when this doesn't
+  // give satisfaction do we use the major types (first three bits)
+  // to dispatch between a few more choices below.
+  switch ((*bytes)[0]) {
+    case kEncodedTrue:
+      out->HandleBool(true);
+      *bytes = bytes->subspan(1);
+      return Error::OK;
+    case kEncodedFalse:
+      out->HandleBool(false);
+      *bytes = bytes->subspan(1);
+      return Error::OK;
+    case kEncodedNull:
+      out->HandleNull();
+      *bytes = bytes->subspan(1);
+      return Error::OK;
+    case kInitialByteForDouble: {
+      double value;
+      if (!DecodeDouble(bytes, &value))
+        return Error::BINARY_ENCODING_INVALID_DOUBLE;
+      out->HandleDouble(value);
+      return Error::OK;
+    }
+    case kInitialByteIndefiniteLengthArray:
+      return ParseArray(stack_depth + 1, bytes, out);
+    case kInitialByteIndefiniteLengthMap:
+      return ParseMap(stack_depth + 1, bytes, out);
+    default:
+      break;
+  }
+  switch ((*bytes)[0] >> kMajorTypeBitShift) {
+    case uint8_t(MajorType::UNSIGNED):
+    case uint8_t(MajorType::NEGATIVE): {
+      int32_t value;
+      if (!DecodeSigned(bytes, &value))
+        return Error::BINARY_ENCODING_INVALID_SIGNED;
+      out->HandleInt(value);
+      return Error::OK;
+    }
+    case uint8_t(MajorType::BYTE_STRING): {
+      std::vector<uint16_t> value;
+      if (!DecodeUTF16String(bytes, &value))
+        return Error::BINARY_ENCODING_INVALID_STRING16;
+      out->HandleString(std::move(value));
+      return Error::OK;
+    }
+    case uint8_t(MajorType::STRING):        // utf8, todo
+    case uint8_t(MajorType::ARRAY):         // indef length case handled above
+    case uint8_t(MajorType::MAP):           // indef length case handled above
+    case uint8_t(MajorType::TAG):           // todo
+    case uint8_t(MajorType::SIMPLE_VALUE):  // supported cases handled above
+    default:
+      return Error::BINARY_ENCODING_UNSUPPORTED_VALUE;
+  }
+}
+
+// |bytes| must start with the indefinite length array byte, so basically,
+// ParseArray may only be called after an indefinite length array has been
+// detected.
+Error ParseArray(int32_t stack_depth, span<uint8_t>* bytes,
+                 JsonParserHandler* out) {
+  assert(!bytes->empty());
+  assert((*bytes)[0] == kInitialByteIndefiniteLengthArray);
+
+  if (stack_depth > kStackLimit)
+    return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
+  *bytes = bytes->subspan(1);
+  out->HandleArrayBegin();
+  while (!bytes->empty()) {
+    // Parse end of array.
+    if ((*bytes)[0] == kStopByte) {
+      out->HandleArrayEnd();
+      return Error::OK;
+    }
+    // Parse value.
+    Error status = ParseValue(stack_depth + 1, bytes, out);
+    if (status != Error::OK) return status;
+  }
+  return Error::BINARY_ENCODING_UNEXPECTED_EOF;
+}
+
+// |bytes| must start with the indefinite length array byte, so basically,
+// ParseArray may only be called after an indefinite length array has been
+// detected.
+Error ParseMap(int32_t stack_depth, span<uint8_t>* bytes,
+               JsonParserHandler* out) {
+  assert(!bytes->empty());
+  assert((*bytes)[0] == kInitialByteIndefiniteLengthMap);
+
+  if (stack_depth > kStackLimit)
+    return Error::BINARY_ENCODING_STACK_LIMIT_EXCEEDED;
+  *bytes = bytes->subspan(1);
+  out->HandleObjectBegin();
+  while (!bytes->empty()) {
+    // Parse end of map.
+    if ((*bytes)[0] == kStopByte) {
+      out->HandleObjectEnd();
+      return Error::OK;
+    }
+    // Parse key.
+    std::vector<uint16_t> key;
+    if (!DecodeUTF16String(bytes, &key))
+      return Error::BINARY_ENCODING_INVALID_MAP_KEY;
+    out->HandleString(std::move(key));
+    // Parse value.
+    Error status = ParseValue(stack_depth + 1, bytes, out);
+    if (status != Error::OK) return status;
+  }
+  return Error::BINARY_ENCODING_UNEXPECTED_EOF;
+}
+}  // namespace
+
+void ParseBinary(span<uint8_t> bytes, JsonParserHandler* json_out) {
+  if (bytes.empty()) {
+    json_out->HandleError(Status{Error::BINARY_ENCODING_UNEXPECTED_EOF, 0});
+    return;
+  }
+  if (bytes[0] != kInitialByteIndefiniteLengthMap) {
+    json_out->HandleError(
+        Status{Error::BINARY_ENCODING_INDEFINITE_LENGTH_MAP_START_EXPECTED, 0});
+    return;
+  }
+  span<uint8_t> internal_bytes = bytes;
+  Error error = ParseMap(/*stack_depth=*/1, &internal_bytes, json_out);
+  if (error == Error::OK) return;
+  json_out->HandleError(Status{error, bytes.size() - internal_bytes.size()});
+}
 }  // namespace inspector_protocol
diff --git a/encoding/binary_encoding.h b/encoding/binary_encoding.h
index 635c551..ace3a86 100644
--- a/encoding/binary_encoding.h
+++ b/encoding/binary_encoding.h
@@ -28,6 +28,10 @@
 // (major type 1) iff < 0.
 void EncodeSigned(int32_t value, std::vector<uint8_t>* out);
 
+// Decodes |value| from |bytes|, if it's encoded as either UNSIGNED
+// or NEGATIVE and within range of int32_t. Otherwise returns false.
+bool DecodeSigned(span<uint8_t>* bytes, int32_t* value);
+
 // Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
 // character in |in| is emitted with most significant byte first,
 // appending to |out|.
@@ -51,5 +55,11 @@
 // |out|. Otherwise, |status.ok()| will be |true|.
 std::unique_ptr<JsonParserHandler> NewJsonToBinaryEncoder(
     std::vector<uint8_t>* out, Status* status);
+
+// Parses a binary encoded message from |bytes|, sending JSON events to
+// |json_out|. If an error occurs, sends |out->HandleError|, and parsing stops.
+// The client is responsible for discarding the already received information in
+// that case.
+void ParseBinary(span<uint8_t> bytes, JsonParserHandler* json_out);
 }  // namespace inspector_protocol
 #endif  // INSPECTOR_PROTOCOL_ENCODING_BINARY_ENCODING_H_
diff --git a/encoding/binary_encoding_test.cc b/encoding/binary_encoding_test.cc
index 889a3dd..9238808 100644
--- a/encoding/binary_encoding_test.cc
+++ b/encoding/binary_encoding_test.cc
@@ -10,6 +10,7 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "json_parser.h"
+#include "json_std_string_writer.h"
 #include "linux_dev_platform.h"
 
 using testing::ElementsAreArray;
@@ -334,21 +335,23 @@
   EncodeUTF16String(span<uint16_t>(utf16.data(), utf16.size()), out);
 }
 
-TEST(JsonToCborConversion, Encoding) {
-  // Hits all the cases except error in JsonParserHandler.
-  std::string json = R"raw({
-     "string": "Hello, \ud83c\udf0e.",
-     "double": 3.1415,
-     "int": 1,
-     "negative int": -1,
-     "bool": true,
-     "null": null,
-     "array": [1,2,3]
-  })raw";
-  std::vector<uint8_t> out;
+TEST(JsonCborRoundtrip, EncodingDecoding) {
+  // Hits all the cases except error in JsonParserHandler, first parsing
+  // a JSON message into CBOR, then parsing it back from CBOR into JSON.
+  std::string json =
+      "{"
+      "\"string\":\"Hello, \\ud83c\\udf0e.\","
+      "\"double\":3.1415,"
+      "\"int\":1,"
+      "\"negative int\":-1,"
+      "\"bool\":true,"
+      "\"null\":null,"
+      "\"array\":[1,2,3]"
+      "}";
+  std::vector<uint8_t> encoded;
   Status status;
   std::unique_ptr<JsonParserHandler> encoder =
-      NewJsonToBinaryEncoder(&out, &status);
+      NewJsonToBinaryEncoder(&encoded, &status);
   span<uint8_t> ascii_in(reinterpret_cast<const uint8_t*>(json.data()),
                          json.size());
   parseJSONChars(GetLinuxDevPlatform(), ascii_in, encoder.get());
@@ -381,6 +384,49 @@
   expected.push_back(0xff);  // End indef length array
   expected.push_back(0xff);  // End indef length map
   EXPECT_TRUE(status.ok());
-  EXPECT_THAT(out, ElementsAreArray(expected));
+  EXPECT_THAT(encoded, ElementsAreArray(expected));
+
+  // And now we roundtrip, decoding the message we just encoded.
+  std::string decoded;
+  std::unique_ptr<JsonParserHandler> json_writer =
+      NewJsonWriter(GetLinuxDevPlatform(), &decoded, &status);
+  ParseBinary(span<uint8_t>(encoded.data(), encoded.size()), json_writer.get());
+  EXPECT_EQ(Error::OK, status.error);
+  EXPECT_EQ(json, decoded);
+}
+
+TEST(ParseBinaryTest, ParseEmptyBinaryMessage) {
+  // Just an indefinite length map that's empty (0xff = stop byte).
+  std::vector<uint8_t> in = {0xbf, 0xff};
+  std::string out;
+  Status status;
+  std::unique_ptr<JsonParserHandler> json_writer =
+      NewJsonWriter(GetLinuxDevPlatform(), &out, &status);
+  ParseBinary(span<uint8_t>(in.data(), in.size()), json_writer.get());
+  EXPECT_EQ(Error::OK, status.error);
+  EXPECT_EQ("{}", out);
+}
+
+TEST(ParseBinaryTest, ParseBinaryHelloWorld) {
+  std::vector<uint8_t> bytes;
+
+  bytes.push_back(0xbf);                    // start indef length map.
+  EncodeAsciiStringForTest("msg", &bytes);  // key: msg
+  // Now write the value, the familiar "Hello, 🌎." where the globe is expressed
+  // as two utf16 chars.
+  bytes.push_back(/*major type=*/2 << 5 | /*additional info=*/20);
+  for (uint8_t ch : std::array<uint8_t, 20>{
+           {'H', 0, 'e', 0, 'l',  0,    'l',  0,    'o', 0,
+            ',', 0, ' ', 0, 0x3c, 0xd8, 0x0e, 0xdf, '.', 0}})
+    bytes.push_back(ch);
+  bytes.push_back(0xff);  // stop byte
+
+  std::string out;
+  Status status;
+  std::unique_ptr<JsonParserHandler> json_writer =
+      NewJsonWriter(GetLinuxDevPlatform(), &out, &status);
+  ParseBinary(span<uint8_t>(bytes.data(), bytes.size()), json_writer.get());
+  EXPECT_EQ(Error::OK, status.error);
+  EXPECT_EQ("{\"msg\":\"Hello, \\ud83c\\udf0e.\"}", out);
 }
 }  // namespace inspector_protocol
diff --git a/encoding/status.h b/encoding/status.h
index 9b15d44..0925743 100644
--- a/encoding/status.h
+++ b/encoding/status.h
@@ -13,19 +13,29 @@
 enum class Error {
   OK = 0,
   // JSON parsing errors - json_parser.{h,cc}.
-  JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 1,
-  JSON_PARSER_STACK_LIMIT_EXCEEDED = 2,
-  JSON_PARSER_NO_INPUT = 3,
-  JSON_PARSER_INVALID_TOKEN = 4,
-  JSON_PARSER_INVALID_NUMBER = 5,
-  JSON_PARSER_INVALID_STRING = 6,
-  JSON_PARSER_UNEXPECTED_ARRAY_END = 7,
-  JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 8,
-  JSON_PARSER_STRING_LITERAL_EXPECTED = 9,
-  JSON_PARSER_COLON_EXPECTED = 10,
-  JSON_PARSER_UNEXPECTED_OBJECT_END = 11,
-  JSON_PARSER_COMMA_OR_OBJECT_END_EXPECTED = 12,
-  JSON_PARSER_VALUE_EXPECTED = 13,
+  JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01,
+  JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02,
+  JSON_PARSER_NO_INPUT = 0x03,
+  JSON_PARSER_INVALID_TOKEN = 0x04,
+  JSON_PARSER_INVALID_NUMBER = 0x05,
+  JSON_PARSER_INVALID_STRING = 0x06,
+  JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07,
+  JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08,
+  JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09,
+  JSON_PARSER_COLON_EXPECTED = 0x0a,
+  JSON_PARSER_UNEXPECTED_OBJECT_END = 0x0b,
+  JSON_PARSER_COMMA_OR_OBJECT_END_EXPECTED = 0x0c,
+  JSON_PARSER_VALUE_EXPECTED = 0x0d,
+
+  BINARY_ENCODING_UNEXPECTED_EOF = 0x0e,
+  BINARY_ENCODING_INDEFINITE_LENGTH_MAP_START_EXPECTED = 0x0f,
+  BINARY_ENCODING_KEY_OR_STOP_BYTE_EXPECTED = 0x10,
+  BINARY_ENCODING_INVALID_MAP_KEY = 0x11,
+  BINARY_ENCODING_STACK_LIMIT_EXCEEDED = 0x12,
+  BINARY_ENCODING_UNSUPPORTED_VALUE = 0x13,
+  BINARY_ENCODING_INVALID_STRING16 = 0x14,
+  BINARY_ENCODING_INVALID_DOUBLE = 0x15,
+  BINARY_ENCODING_INVALID_SIGNED = 0x16,
 };
 
 // A status value with position that can be copied. The default status