blob: a51b2b83446e5f9cc3b9d5ca06d6c6d6720e6481 [file] [log] [blame]
// Copyright 2015 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datastore
import (
"bytes"
"fmt"
"sort"
"time"
"go.chromium.org/luci/common/data/cmpbin"
"go.chromium.org/luci/common/data/stringset"
)
// WritePropertyMapDeterministic allows tests to make Serializer.PropertyMap
// deterministic.
//
// This should be set once at the top of your tests in an init() function.
var WritePropertyMapDeterministic = false
// Serializer allows writing binary-encoded datastore types (like Properties,
// Keys, etc.)
//
// See the `Serialize` and `SerializeKC` variables for common shortcuts.
type Serializer struct {
// If true, WithKeyContext controls whether bytes written with this Serializer
// include the Key's appid and namespace.
//
// Frequently the appid and namespace of keys are known in advance and so
// there's no reason to redundantly encode them.
WithKeyContext bool
}
var (
// Serialize is a Serializer{WithKeyContext:false}, useful for inline
// invocations like:
//
// datastore.Serialize.Time(...)
Serialize Serializer
// SerializeKC is a Serializer{WithKeyContext:true}, useful for inline
// invocations like:
//
// datastore.SerializeKC.Key(...)
SerializeKC = Serializer{true}
)
// Key encodes a key to the buffer. If context is WithContext, then this
// encoded value will include the appid and namespace of the key.
func (s Serializer) Key(buf cmpbin.WriteableBytesBuffer, k *Key) (err error) {
// [appid ++ namespace]? ++ [1 ++ token]* ++ NULL
defer recoverTo(&err)
appid, namespace, toks := k.Split()
if s.WithKeyContext {
panicIf(buf.WriteByte(1))
_, e := cmpbin.WriteString(buf, appid)
panicIf(e)
_, e = cmpbin.WriteString(buf, namespace)
panicIf(e)
} else {
panicIf(buf.WriteByte(0))
}
for _, tok := range toks {
panicIf(buf.WriteByte(1))
panicIf(s.KeyTok(buf, tok))
}
return buf.WriteByte(0)
}
// KeyTok writes a KeyTok to the buffer. You usually want Key instead of this.
func (s Serializer) KeyTok(buf cmpbin.WriteableBytesBuffer, tok KeyTok) (err error) {
// tok.kind ++ typ ++ [tok.stringID || tok.intID]
defer recoverTo(&err)
_, e := cmpbin.WriteString(buf, tok.Kind)
panicIf(e)
if tok.StringID != "" {
panicIf(buf.WriteByte(byte(PTString)))
_, e := cmpbin.WriteString(buf, tok.StringID)
panicIf(e)
} else {
panicIf(buf.WriteByte(byte(PTInt)))
_, e := cmpbin.WriteInt(buf, tok.IntID)
panicIf(e)
}
return nil
}
// GeoPoint writes a GeoPoint to the buffer.
func (s Serializer) GeoPoint(buf cmpbin.WriteableBytesBuffer, gp GeoPoint) (err error) {
defer recoverTo(&err)
_, e := cmpbin.WriteFloat64(buf, gp.Lat)
panicIf(e)
_, e = cmpbin.WriteFloat64(buf, gp.Lng)
return e
}
// Time writes a time.Time to the buffer.
//
// The supplied time is rounded via datastore.RoundTime and written as a
// microseconds-since-epoch integer to comform to datastore storage standards.
func (s Serializer) Time(buf cmpbin.WriteableBytesBuffer, t time.Time) error {
name, off := t.Zone()
if name != "UTC" || off != 0 {
panic(fmt.Errorf("helper: UTC OR DEATH: %s", t))
}
_, err := cmpbin.WriteInt(buf, TimeToInt(t))
return err
}
// Property writes a Property to the buffer. `context` behaves the same
// way that it does for WriteKey, but only has an effect if `p` contains a
// Key as its IndexValue.
func (s Serializer) Property(buf cmpbin.WriteableBytesBuffer, p Property) error {
return s.propertyImpl(buf, &p, false)
}
// IndexedProperty writes a Property to the buffer as its native index type.
// `context` behaves the same way that it does for WriteKey, but only has an
// effect if `p` contains a Key as its IndexValue.
func (s Serializer) IndexedProperty(buf cmpbin.WriteableBytesBuffer, p Property) error {
return s.propertyImpl(buf, &p, true)
}
// propertyImpl is an implementation of Property and IndexedProperty.
func (s Serializer) propertyImpl(buf cmpbin.WriteableBytesBuffer, p *Property, index bool) (err error) {
defer recoverTo(&err)
var it PropertyType
var v any
if !p.Type().Comparable() {
// Non-comparable types are stored as is and can't be used in indexes.
it, v = p.Type(), p.Value()
if index {
return fmt.Errorf("serializing uncomparable type %s as indexed property", p.Type())
}
} else {
// For comparable indexable types do type coercion to an indexed value. Note
// that this means that e.g. PTTime is *always* stored as int64. If `index`
// is true (meaning we are going to compare the final byte blob to other
// byte blobs in the index), the property is also tagged by PTInt type
// (instead of its native PTTime). That way all properties represented by
// e.g. int64 are comparable to one another as raw byte blobs, regardless of
// their "public" type.
it, v = p.IndexTypeAndValue()
if !index {
it = p.Type()
}
}
// Note: non-comparable properties still can have "indexed" flag set. It is
// just interpreted differently (in PTPropertyMap case it means to index
// the inner properties).
typb := byte(it)
if p.IndexSetting() != NoIndex {
typb |= 0x80
}
panicIf(buf.WriteByte(typb))
err = s.rawPropValue(buf, v)
return
}
// rawPropValue writes the value of v to buf.
//
// `v` is either a type produced by IndexTypeAndValue() or some uncomparable
// type (e.g. PropertyMap).
func (s Serializer) rawPropValue(buf cmpbin.WriteableBytesBuffer, v any) (err error) {
switch t := v.(type) {
case nil:
case bool:
b := byte(0)
if t {
b = 1
}
err = buf.WriteByte(b)
case int64:
_, err = cmpbin.WriteInt(buf, t)
case float64:
_, err = cmpbin.WriteFloat64(buf, t)
case string:
_, err = cmpbin.WriteString(buf, t)
case []byte:
_, err = cmpbin.WriteBytes(buf, t)
case GeoPoint:
err = s.GeoPoint(buf, t)
case PropertyMap:
// This is a property map representing a nested entity. Cloud Datastore can
// store their keys, but only if they are non-partial. Do the same here. If
// `t` has `$key`, its key context will be used, otherwise there will be
// no key context at all (it will be populated when deserializing).
key, _ := (KeyContext{}).NewKeyFromMeta(t)
if key != nil && key.IsIncomplete() {
key = nil
}
err = s.propertyMap(buf, t, key)
case *Key:
err = s.Key(buf, t)
default:
err = fmt.Errorf("unsupported type: %T", t)
}
return
}
// PropertyMap writes an entire PropertyMap to the buffer. `context`
// behaves the same way that it does for WriteKey.
//
// If WritePropertyMapDeterministic is true, then the rows will be sorted by
// property name before they're serialized to buf (mostly useful for testing,
// but also potentially useful if you need to make a hash of the property data).
func (s Serializer) PropertyMap(buf cmpbin.WriteableBytesBuffer, pm PropertyMap) error {
return s.propertyMap(buf, pm, nil)
}
// propertyMap writes an entire PropertyMap to the buffer.
//
// If `key` is given, it will be serialized as a `$key` meta property. All other
// meta properties are skipped.
func (s Serializer) propertyMap(buf cmpbin.WriteableBytesBuffer, pm PropertyMap, key *Key) (err error) {
defer recoverTo(&err)
rows := make(sort.StringSlice, 0, len(pm)+1)
tmpBuf := &bytes.Buffer{}
writeProp := func(name string, pdata PropertyData) {
tmpBuf.Reset()
_, e := cmpbin.WriteString(tmpBuf, name)
panicIf(e)
switch t := pdata.(type) {
case Property:
_, e = cmpbin.WriteInt(tmpBuf, -1)
panicIf(e)
panicIf(s.Property(tmpBuf, t))
case PropertySlice:
_, e = cmpbin.WriteInt(tmpBuf, int64(len(t)))
panicIf(e)
for _, p := range t {
panicIf(s.Property(tmpBuf, p))
}
default:
panicIf(fmt.Errorf("unknown PropertyData type %T", t))
}
rows = append(rows, tmpBuf.String())
}
if key != nil {
writeProp("$key", MkPropertyNI(key))
}
for name, pdata := range pm {
if !isMetaKey(name) {
writeProp(name, pdata)
}
}
if WritePropertyMapDeterministic {
rows.Sort()
}
_, e := cmpbin.WriteUint(buf, uint64(len(rows)))
panicIf(e)
for _, r := range rows {
_, e := buf.WriteString(r)
panicIf(e)
}
return
}
// IndexColumn writes an IndexColumn to the buffer.
func (s Serializer) IndexColumn(buf cmpbin.WriteableBytesBuffer, c IndexColumn) (err error) {
defer recoverTo(&err)
if !c.Descending {
panicIf(buf.WriteByte(0))
} else {
panicIf(buf.WriteByte(1))
}
_, err = cmpbin.WriteString(buf, c.Property)
return
}
// IndexDefinition writes an IndexDefinition to the buffer
func (s Serializer) IndexDefinition(buf cmpbin.WriteableBytesBuffer, i IndexDefinition) (err error) {
defer recoverTo(&err)
_, err = cmpbin.WriteString(buf, i.Kind)
panicIf(err)
if !i.Ancestor {
panicIf(buf.WriteByte(0))
} else {
panicIf(buf.WriteByte(1))
}
for _, sb := range i.SortBy {
panicIf(buf.WriteByte(1))
panicIf(s.IndexColumn(buf, sb))
}
return buf.WriteByte(0)
}
// IndexedPropertySlice is a set of properties serialized to their comparable
// index representations via Serializer.IndexedProperties(...).
//
// Values are in some arbitrary order. If you need them sorted, call sort.Sort
// explicitly.
type IndexedPropertySlice [][]byte
func (s IndexedPropertySlice) Len() int { return len(s) }
func (s IndexedPropertySlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s IndexedPropertySlice) Less(i, j int) bool { return bytes.Compare(s[i], s[j]) < 0 }
// IndexedProperties maps from a property name to a set of its indexed values.
//
// It includes special values '__key__' and '__ancestor__' which contains all of
// the ancestor entries for this key.
//
// Map values are in some arbitrary order. If you need them sorted, call Sort
// explicitly.
type IndexedProperties map[string]IndexedPropertySlice
// Sort sorts all values (useful in tests).
func (sip IndexedProperties) Sort() {
for _, v := range sip {
sort.Sort(v)
}
}
// indexedPropsBuilder is used to construct IndexedProperties.
type indexedPropsBuilder map[string]stringset.Set
// add adds an entry to the set under the given key.
func (b indexedPropsBuilder) add(key string, val []byte) {
if vals := b[key]; vals != nil {
vals.Add(string(val))
} else {
b[key] = stringset.Set{string(val): struct{}{}}
}
}
// visit recursively traverses a property map, adding all indexed properties.
func (b indexedPropsBuilder) visit(propNamePfx string, pm PropertyMap) {
for k := range pm {
if isMetaKey(k) {
continue
}
for _, v := range pm.Slice(k) {
if v.IndexSetting() == NoIndex {
continue
}
switch {
case v.Type() == PTPropertyMap:
// Nested property maps are indexed recursively per-field, together with
// a special `__key__` property representing the embedded key, as long
// as it is a complete key.
nested := v.Value().(PropertyMap)
if key, _ := (KeyContext{}).NewKeyFromMeta(nested); key != nil && !key.IsIncomplete() {
b.add(propNamePfx+k+".__key__", Serialize.ToBytes(MkProperty(key)))
}
b.visit(propNamePfx+k+".", nested)
case v.Type().Comparable():
// Regular indexed properties that can be converted to byte blobs.
b.add(propNamePfx+k, Serialize.ToBytes(v))
default:
panic(fmt.Sprintf("uncomparable type %q being indexed as %q", v.Type(), propNamePfx+k))
}
}
}
}
// collect converts all added properties into the final IndexedProperties map.
func (b indexedPropsBuilder) collect() IndexedProperties {
res := make(IndexedProperties, len(b))
for k, v := range b {
blobs := make([][]byte, 0, v.Len())
for str := range v {
blobs = append(blobs, []byte(str))
}
res[k] = blobs
}
return res
}
// IndexedProperties turns a regular PropertyMap into a IndexedProperties.
//
// Essentially all the []Property's become IndexedPropertySlice, using cmpbin
// and Serializer's encodings.
//
// Keys are serialized without their context.
func (s Serializer) IndexedProperties(k *Key, pm PropertyMap) IndexedProperties {
builder := make(indexedPropsBuilder, len(pm)+2)
if k != nil {
builder.add("__key__", Serialize.ToBytes(MkProperty(k)))
for k != nil {
builder.add("__ancestor__", Serialize.ToBytes(MkProperty(k)))
k = k.Parent()
}
}
builder.visit("", pm)
return builder.collect()
}
// IndexedPropertiesForIndicies is like IndexedProperties, but it returns only
// properties mentioned in the given indices as well as special '__key__' and
// '__ancestor__' properties.
//
// It is just an optimization to avoid serializing indices that will never be
// checked.
func (s Serializer) IndexedPropertiesForIndicies(k *Key, pm PropertyMap, idx []IndexColumn) (ret IndexedProperties) {
// TODO(vadimsh): Really make it an optimization. This function is only used
// in txnbuf, which is currently essentially dead code.
res := make(IndexedProperties, len(idx)+2)
sip := s.IndexedProperties(k, pm)
move := func(key string) {
if v := sip[key]; len(v) > 0 {
res[key] = v
}
}
move("__key__")
move("__ancestor__")
for _, col := range idx {
move(col.Property)
}
return res
}
// ToBytesErr serializes i to a byte slice, if it's one of the type supported
// by this library, otherwise it returns an error.
func (s Serializer) ToBytesErr(i any) (ret []byte, err error) {
buf := bytes.Buffer{}
switch t := i.(type) {
case IndexColumn:
err = s.IndexColumn(&buf, t)
case IndexDefinition:
err = s.IndexDefinition(&buf, t)
case KeyTok:
err = s.KeyTok(&buf, t)
case Property:
err = s.IndexedProperty(&buf, t)
case PropertyMap:
err = s.PropertyMap(&buf, t)
default:
// Do the same as Property(...), except do not write the type tag.
prop := MkProperty(i)
if prop.Type().Comparable() {
_, v := prop.IndexTypeAndValue()
err = s.rawPropValue(&buf, v)
} else {
err = s.rawPropValue(&buf, prop.Value())
}
}
if err == nil {
ret = buf.Bytes()
}
return
}
// ToBytes serializes i to a byte slice, if it's one of the type supported
// by this library. If an error is encountered (e.g. `i` is not a supported
// type), this method panics.
func (s Serializer) ToBytes(i any) []byte {
ret, err := s.ToBytesErr(i)
if err != nil {
panic(err)
}
return ret
}
type parseError error
func panicIf(err error) {
if err != nil {
panic(parseError(err))
}
}
func recoverTo(err *error) {
if r := recover(); r != nil {
if rerr := r.(parseError); rerr != nil {
*err = error(rerr)
}
}
}