blob: 8f4f9a4036b0b7f8a7a91106a017888213597b8c [file] [log] [blame]
// Copyright 2020 The LUCI Authors.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package datastore
import (
// protoOption specifies how to handle field implementing proto.Message.
// **Modern format**: first byte reserved for denoting format and is thus not
// compatible with legacy format. Options supported currently:
// "nocompress" - (same as default) no compression.
// Able to read compressed items in modern format.
// "zstd" - compress serialized proto with zstd encoding.
// Able to read non-compressed items in modern format.
// **Legacy formats**: not compatible between each other or modern formats.
// Options supported:
// "legacy" - reads/writes serialized proto message. Useful for
// migrating off `proto-gae` tool.
type protoOption string
const (
// non-legacy proto serialization first writes a varint with its kind.
// To avoid accidental overlap with legacy protobuf encoding and ensure
// that proto unmarshaling will error out on it, use
// number := (N<<3) | 4
// Explanation:
// Proto serialization also first writes a varint on the wire representing so
// called "tag", which is comprised of field number and wire type (see [1]):
// tag := (field_number<<3) | wire_type
// There are 2 long deprecated wire type which isn't even supported by most
// languages (see [2]), one of which is "group end" which has a value of 4.
// Group end specifically shouldn't be at the beginning of a message,
// notwithstanding smart-ass hackery like this one, of course.
// Therefore, for any field number N, value of `(N<<3) | 4`, incorrect proto
// decoding will error out pretty quickly.
// [1]
// [2]
// WARNING: changing these values is not backwards compatible.
protoBinOptNoCompress = (1 << 3) | 4
protoBinOptZSTD = (2 << 3) | 4
// compressionThreshold is the number of bytes of serialized proto value after which
// compression kicks in.
const compressionThreshold = 16 * 1024
var errInvalidProtoPrefix = fmt.Errorf("invalid gae proto serialization or unrecognized compression scheme")
func protoToProperty(pb proto.Message, opt protoOption) (prop Property, err error) {
// proto can't marshall to io.Writer, so might as well serialize it now,
// but leave first byte free for "nocompress" case.
blob := make([]byte, 1, 16)
if blob, err = (proto.MarshalOptions{}).MarshalAppend(blob, pb); err != nil {
pbblob := blob[1:]
if opt == "" /*default*/ {
opt = "nocompress"
if len(pbblob) > compressionThreshold {
opt = "zstd"
switch opt {
case "legacy":
prop = MkPropertyNI(pbblob)
case "nocompress":
write1ByteProtoOpt(blob, protoBinOptNoCompress)
prop = MkPropertyNI(blob)
case "zstd":
// allocate new buffer for compressed data, hoping for ~2x compression.
blob = make([]byte, 1, len(pbblob)/2)
write1ByteProtoOpt(blob, protoBinOptZSTD)
blob = zstd.EncodeAll(pbblob, blob)
prop = MkPropertyNI(blob)
panic(fmt.Errorf("unrecognized proto option: %q", opt))
func protoFromProperty(field reflect.Value, prop Property, opt protoOption) error {
// Python can write NULL properties for empty blobs. Treat them as missing.
if prop.Type() == PTNull {
return nil
pm, _ := field.Interface().(proto.Message)
data, err := prop.Project(PTBytes)
if err != nil {
return err
blob := data.([]byte)
pm = pm.ProtoReflect().New().Interface()
switch opt {
case "legacy":
break // read entire blob.
case "zstd", "", "nocompress":
switch binOpt, readBytes := binary.Uvarint(blob); {
case readBytes != 1:
return errInvalidProtoPrefix
case protoBinOptNoCompress == binOpt:
blob = blob[1:]
case protoBinOptZSTD == binOpt:
if blob, err = zstd.DecodeAll(blob[1:], nil); err != nil {
return err
return errInvalidProtoPrefix
panic(fmt.Errorf("unrecognized proto option: %q", opt))
if err = proto.Unmarshal(blob, pm); err != nil {
return err
return nil
func write1ByteProtoOpt(b []byte, opt uint64) {
if n := binary.PutUvarint(b, opt); n != 1 {
panic(fmt.Errorf("protoOption longer than 1 byte: %d", n))