blob: 856f4c19bf0c110f29999217dfff28464245c12c [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package memory
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
ds "github.com/luci/gae/service/datastore"
"github.com/luci/gae/service/datastore/serialize"
"github.com/luci/luci-go/common/cmpbin"
"github.com/luci/luci-go/common/stringset"
)
// MaxQueryComponents was lifted from a hard-coded constant in dev_appserver.
// No idea if it's a real limit or just a convenience in the current dev
// appserver implementation.
const MaxQueryComponents = 100
// MaxIndexColumns is the maximum number of index columns we're willing to
// support.
const MaxIndexColumns = 64
// A queryCursor is:
// {#orders} ++ IndexColumn* ++ RawRowData
// IndexColumn will always contain __key__ as the last column, and so #orders
// must always be >= 1
type queryCursor []byte
func newCursor(s string) (ds.Cursor, error) {
d, err := base64.URLEncoding.DecodeString(s)
if err != nil {
return nil, fmt.Errorf("Failed to Base64-decode cursor: %s", err)
}
c := queryCursor(d)
if _, _, err := c.decode(); err != nil {
return nil, err
}
return c, nil
}
func (q queryCursor) String() string { return base64.URLEncoding.EncodeToString([]byte(q)) }
// decode returns the encoded IndexColumns, the raw row (cursor) data, or an
// error.
func (q queryCursor) decode() ([]ds.IndexColumn, []byte, error) {
buf := bytes.NewBuffer([]byte(q))
count, _, err := cmpbin.ReadUint(buf)
if err != nil {
return nil, nil, fmt.Errorf("invalid cursor: bad prefix number")
}
if count == 0 || count > MaxIndexColumns {
return nil, nil, fmt.Errorf("invalid cursor: bad column count %d", count)
}
if count == 0 {
return nil, nil, fmt.Errorf("invalid cursor: zero prefix number")
}
cols := make([]ds.IndexColumn, count)
for i := range cols {
if cols[i], err = serialize.ReadIndexColumn(buf); err != nil {
return nil, nil, fmt.Errorf("invalid cursor: unable to decode IndexColumn %d: %s", i, err)
}
}
if cols[len(cols)-1].Property != "__key__" {
return nil, nil, fmt.Errorf("invalid cursor: last column was not __key__: %v", cols[len(cols)-1])
}
return cols, buf.Bytes(), nil
}
func sortOrdersEqual(as, bs []ds.IndexColumn) bool {
if len(as) != len(bs) {
return false
}
for i, a := range as {
if a != bs[i] {
return false
}
}
return true
}
func numComponents(fq *ds.FinalizedQuery) int {
numComponents := len(fq.Orders())
if p, _, _ := fq.IneqFilterLow(); p != "" {
numComponents++
}
if p, _, _ := fq.IneqFilterHigh(); p != "" {
numComponents++
}
for _, v := range fq.EqFilters() {
numComponents += v.Len()
}
return numComponents
}
// GetBinaryBounds gets the binary encoding of the upper and lower bounds of
// the inequality filter on fq, if any is defined. If a bound does not exist,
// it is nil.
//
// NOTE: if fq specifies a descending sort order for the inequality, the bounds
// will be inverted, incremented, and fliped.
func GetBinaryBounds(fq *ds.FinalizedQuery) (lower, upper []byte) {
// Pick up the start/end range from the inequalities, if any.
//
// start and end in the reducedQuery are normalized so that `start >=
// X < end`. Because of that, we need to tweak the inequality filters
// contained in the query if they use the > or <= operators.
if ineqProp := fq.IneqFilterProp(); ineqProp != "" {
_, startOp, startV := fq.IneqFilterLow()
if startOp != "" {
lower = serialize.ToBytes(startV)
if startOp == ">" {
lower = increment(lower)
}
}
_, endOp, endV := fq.IneqFilterHigh()
if endOp != "" {
upper = serialize.ToBytes(endV)
if endOp == "<=" {
upper = increment(upper)
}
}
// The inequality is specified in natural (ascending) order in the query's
// Filter syntax, but the order information may indicate to use a descending
// index column for it. If that's the case, then we must invert, swap and
// increment the inequality endpoints.
//
// Invert so that the desired numbers are represented correctly in the index.
// Swap so that our iterators still go from >= start to < end.
// Increment so that >= and < get correctly bounded (since the iterator is
// still using natrual bytes ordering)
if fq.Orders()[0].Descending {
hi, lo := []byte(nil), []byte(nil)
if len(lower) > 0 {
lo = increment(serialize.Invert(lower))
}
if len(upper) > 0 {
hi = increment(serialize.Invert(upper))
}
upper, lower = lo, hi
}
}
return
}
func reduce(fq *ds.FinalizedQuery, aid, ns string, isTxn bool) (*reducedQuery, error) {
if err := fq.Valid(aid, ns); err != nil {
return nil, err
}
if isTxn && fq.Ancestor() == nil {
return nil, fmt.Errorf("queries within a transaction must include an Ancestor filter")
}
if num := numComponents(fq); num > MaxQueryComponents {
return nil, fmt.Errorf(
"gae/memory: query is too large. may not have more than "+
"%d filters + sort orders + ancestor total: had %d",
MaxQueryComponents, num)
}
ret := &reducedQuery{
aid: aid,
ns: ns,
kind: fq.Kind(),
suffixFormat: fq.Orders(),
}
eqFilts := fq.EqFilters()
ret.eqFilters = make(map[string]stringset.Set, len(eqFilts))
for prop, vals := range eqFilts {
sVals := stringset.New(len(vals))
for _, v := range vals {
sVals.Add(string(serialize.ToBytes(v)))
}
ret.eqFilters[prop] = sVals
}
startD, endD := GetBinaryBounds(fq)
// Now we check the start and end cursors.
//
// Cursors are composed of a list of IndexColumns at the beginning, followed
// by the raw bytes to use for the suffix. The cursor is only valid if all of
// its IndexColumns match our proposed suffixFormat, as calculated above.
//
// Cursors are mutually exclusive with the start/end we picked up from the
// inequality. In a well formed query, they indicate a subset of results
// bounded by the inequality. Technically if the start cursor is not >= the
// low bound, or the end cursor is < the high bound, it's an error, but for
// simplicity we just cap to the narrowest intersection of the inequality and
// cursors.
ret.start = startD
ret.end = endD
if start, end := fq.Bounds(); start != nil || end != nil {
if start != nil {
if c, ok := start.(queryCursor); ok {
startCols, startD, err := c.decode()
if err != nil {
return nil, err
}
if !sortOrdersEqual(startCols, ret.suffixFormat) {
return nil, errors.New("gae/memory: start cursor is invalid for this query")
}
if ret.start == nil || bytes.Compare(ret.start, startD) < 0 {
ret.start = startD
}
} else {
return nil, errors.New("gae/memory: bad cursor type")
}
}
if end != nil {
if c, ok := end.(queryCursor); ok {
endCols, endD, err := c.decode()
if err != nil {
return nil, err
}
if !sortOrdersEqual(endCols, ret.suffixFormat) {
return nil, errors.New("gae/memory: end cursor is invalid for this query")
}
if ret.end == nil || bytes.Compare(endD, ret.end) < 0 {
ret.end = endD
}
} else {
return nil, errors.New("gae/memory: bad cursor type")
}
}
}
// Finally, verify that we could even /potentially/ do work. If we have
// overlapping range ends, then we don't have anything to do.
if ret.end != nil && bytes.Compare(ret.start, ret.end) >= 0 {
return nil, ds.ErrNullQuery
}
ret.numCols = len(ret.suffixFormat)
for prop, vals := range ret.eqFilters {
if len(ret.suffixFormat) == 1 && prop == "__ancestor__" {
continue
}
ret.numCols += vals.Len()
}
return ret, nil
}
func increment(bstr []byte) []byte {
ret, overflow := serialize.Increment(bstr)
if overflow {
// This byte string was ALL 0xFF's. The only safe incrementation to do here
// would be to add a new byte to the beginning of bstr with the value 0x01,
// and a byte to the beginning OF ALL OTHER []byte's which bstr may be
// compared with. This is obviously impossible to do here, so panic. If we
// hit this, then we would need to add a spare 0 byte before every index
// column.
//
// Another way to think about this is that we just accumulated a 'carry' bit,
// and the new value has overflowed this representation.
//
// Fortunately, the first byte of a serialized index column entry is a
// PropertyType byte, and the only valid values that we'll be incrementing
// are never equal to 0xFF, since they have the high bit set (so either they're
// 0x8*, or 0x7*, depending on if it's inverted).
impossible(fmt.Errorf("incrementing %v would require more sigfigs", bstr))
}
return ret
}