| // Copyright 2015 The LUCI Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package memory |
| |
| import ( |
| "bytes" |
| "fmt" |
| "sort" |
| "strings" |
| |
| "go.chromium.org/luci/common/data/cmpbin" |
| "go.chromium.org/luci/common/data/stringset" |
| |
| ds "go.chromium.org/luci/gae/service/datastore" |
| ) |
| |
| // ErrMissingIndex is returned when the current indexes are not sufficient |
| // for the current query. |
| type ErrMissingIndex struct { |
| ns string |
| Missing *ds.IndexDefinition |
| } |
| |
| func (e *ErrMissingIndex) Error() string { |
| yaml, err := e.Missing.YAMLString() |
| if err != nil { |
| panic(err) |
| } |
| return fmt.Sprintf( |
| "Insufficient indexes. Consider adding:\n%s", yaml) |
| } |
| |
| // reducedQuery contains only the pieces of the query necessary to iterate for |
| // results. |
| // |
| // deduplication is applied externally |
| // projection / keysonly / entity retrieval is done externally |
| type reducedQuery struct { |
| kc ds.KeyContext |
| kind string |
| |
| // eqFilters indicate the set of all prefix constraints which need to be |
| // fulfilled in the composite query. All of these will translate into prefix |
| // bytes for SOME index. |
| eqFilters map[string]stringset.Set |
| |
| // suffixFormat is the PRECISE listing of the suffix columns that ALL indexes |
| // in the multi query will have. |
| // |
| // suffixFormat ALWAYS includes the inequality filter (if any) as the 0th |
| // element |
| // suffixFormat ALWAYS includes any additional projections (in ascending |
| // order) after all user defined sort orders |
| // suffixFormat ALWAYS has __key__ as the last column |
| suffixFormat []ds.IndexColumn |
| |
| // limits of the inequality and/or full sort order. This is ONLY a suffix, |
| // and it will be appended to the prefix during iteration. |
| start []byte |
| end []byte |
| |
| // metadata describing the total number of columns that this query requires to |
| // execute perfectly. |
| numCols int |
| } |
| |
| type indexDefinitionSortable struct { |
| // eqFilts is the list of ACTUAL prefix columns. Note that it may contain |
| // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becuase |
| // (tag=1, tag=2) is a perfectly valid query). |
| eqFilts []ds.IndexColumn |
| coll memCollection |
| } |
| |
| func (i *indexDefinitionSortable) hasAncestor() bool { |
| return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__" |
| } |
| |
| func (i *indexDefinitionSortable) numEqHits(c *constraints) int { |
| ret := 0 |
| for _, filt := range i.eqFilts { |
| if _, ok := c.constraints[filt.Property]; ok { |
| ret++ |
| } |
| } |
| return ret |
| } |
| |
| type indexDefinitionSortableSlice []indexDefinitionSortable |
| |
| func (idxs indexDefinitionSortableSlice) Len() int { return len(idxs) } |
| func (idxs indexDefinitionSortableSlice) Swap(i, j int) { idxs[i], idxs[j] = idxs[j], idxs[i] } |
| func (idxs indexDefinitionSortableSlice) Less(i, j int) bool { |
| a, b := idxs[i], idxs[j] |
| if a.coll == nil && b.coll != nil { |
| return true |
| } else if a.coll != nil && b.coll == nil { |
| return false |
| } |
| |
| cmp := len(a.eqFilts) - len(b.eqFilts) |
| if cmp < 0 { |
| return true |
| } else if cmp > 0 { |
| return false |
| } |
| for k, col := range a.eqFilts { |
| ocol := b.eqFilts[k] |
| if !col.Descending && ocol.Descending { |
| return true |
| } else if col.Descending && !ocol.Descending { |
| return false |
| } |
| if col.Property < ocol.Property { |
| return true |
| } else if col.Property > ocol.Property { |
| return false |
| } |
| } |
| return false |
| } |
| |
| // maybeAddDefinition possibly adds a new indexDefinitionSortable to this slice. |
| // It's only added if it could be useful in servicing q, otherwise this function |
| // is a noop. |
| // |
| // This returns true iff the proposed index is OK and depletes missingTerms to |
| // empty. |
| // |
| // If the proposed index is PERFECT (e.g. contains enough columns to cover all |
| // equality filters, and also has the correct suffix), idxs will be replaced |
| // with JUST that index, and this will return true. |
| func (idxs *indexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s memStore, missingTerms stringset.Set, id *ds.IndexDefinition) bool { |
| // Kindless queries are handled elsewhere. |
| if id.Kind != q.kind { |
| impossible( |
| fmt.Errorf("maybeAddDefinition given index with wrong kind %q v %q", id.Kind, q.kind)) |
| } |
| |
| // If we're an ancestor query, and the index is compound, but doesn't include |
| // an Ancestor field, it doesn't work. Builtin indexes can be used for |
| // ancestor queries (and have !Ancestor), assuming that it's only equality |
| // filters (plus inequality on __key__), or a single inequality. |
| if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { |
| impossible( |
| fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q)) |
| } |
| |
| // add __ancestor__ if necessary |
| sortBy := id.GetFullSortOrder() |
| |
| // If the index has fewer fields than we need for the suffix, it can't |
| // possibly help. |
| if len(sortBy) < len(q.suffixFormat) { |
| return false |
| } |
| |
| numEqFilts := len(sortBy) - len(q.suffixFormat) |
| // make sure the orders are precisely the same |
| for i, sb := range sortBy[numEqFilts:] { |
| if q.suffixFormat[i] != sb { |
| return false |
| } |
| } |
| |
| if id.Builtin() && numEqFilts == 0 { |
| if len(q.eqFilters) > 1 || (len(q.eqFilters) == 1 && q.eqFilters["__ancestor__"] == nil) { |
| return false |
| } |
| if len(sortBy) > 1 && q.eqFilters["__ancestor__"] != nil { |
| return false |
| } |
| } |
| |
| // Make sure the equalities section doesn't contain any properties we don't |
| // want in our query. |
| // |
| // numByProp && totalEqFilts will be used to see if this is a perfect match |
| // later. |
| numByProp := make(map[string]int, len(q.eqFilters)) |
| totalEqFilts := 0 |
| |
| eqFilts := sortBy[:numEqFilts] |
| for _, p := range eqFilts { |
| if _, ok := q.eqFilters[p.Property]; !ok { |
| return false |
| } |
| numByProp[p.Property]++ |
| totalEqFilts++ |
| } |
| |
| // ok, we can actually use this |
| |
| // Grab the collection for convenience later. We don't want to invalidate this |
| // index's potential just because the collection doesn't exist. If it's |
| // a builtin and it doesn't exist, it still needs to be one of the 'possible' |
| // indexes... it just means that the user's query will end up with no results. |
| coll := s.GetCollection( |
| fmt.Sprintf("idx:%s:%s", q.kc.Namespace, ds.Serialize.ToBytes(*id.PrepForIdxTable()))) |
| |
| // First, see if it's a perfect match. If it is, then our search is over. |
| // |
| // A perfect match contains ALL the equality filter columns (or more, since |
| // we can use residuals to fill in the extras). |
| for _, sb := range eqFilts { |
| missingTerms.Del(sb.Property) |
| } |
| |
| perfect := false |
| if len(sortBy) == q.numCols { |
| perfect = true |
| for k, num := range numByProp { |
| if num < q.eqFilters[k].Len() { |
| perfect = false |
| break |
| } |
| } |
| } |
| toAdd := indexDefinitionSortable{coll: coll, eqFilts: eqFilts} |
| if perfect { |
| *idxs = indexDefinitionSortableSlice{toAdd} |
| } else { |
| *idxs = append(*idxs, toAdd) |
| } |
| return missingTerms.Len() == 0 |
| } |
| |
| // getRelevantIndexes retrieves the relevant indexes which could be used to |
| // service q. It returns nil if it's not possible to service q with the current |
| // indexes. |
| func getRelevantIndexes(q *reducedQuery, s memStore) (indexDefinitionSortableSlice, error) { |
| missingTerms := stringset.New(len(q.eqFilters)) |
| for k := range q.eqFilters { |
| if k == "__ancestor__" { |
| // ancestor is not a prefix which can be satisfied by a single index. It |
| // must be satisfied by ALL indexes (and has special logic for this in |
| // the addDefinition logic) |
| continue |
| } |
| missingTerms.Add(k) |
| } |
| idxs := indexDefinitionSortableSlice{} |
| |
| // First we add builtins |
| // add |
| // idx:KIND |
| if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ |
| Kind: q.kind, |
| }) { |
| return idxs, nil |
| } |
| |
| // add |
| // idx:KIND:prop |
| // idx:KIND:-prop |
| props := stringset.New(len(q.eqFilters) + len(q.suffixFormat)) |
| for prop := range q.eqFilters { |
| props.Add(prop) |
| } |
| for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] { |
| props.Add(col.Property) |
| } |
| for _, prop := range props.ToSlice() { |
| if !isSpecialProp(prop) && (strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__")) { |
| continue |
| } |
| if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ |
| Kind: q.kind, |
| SortBy: []ds.IndexColumn{ |
| {Property: prop}, |
| }, |
| }) { |
| return idxs, nil |
| } |
| if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ |
| Kind: q.kind, |
| SortBy: []ds.IndexColumn{ |
| {Property: prop, Descending: true}, |
| }, |
| }) { |
| return idxs, nil |
| } |
| } |
| |
| // Try adding all compound indexes whose suffix matches. |
| suffix := &ds.IndexDefinition{ |
| Kind: q.kind, |
| Ancestor: q.eqFilters["__ancestor__"] != nil, |
| SortBy: q.suffixFormat, |
| } |
| walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool { |
| // keep walking until we find a perfect index. |
| return !idxs.maybeAddDefinition(q, s, missingTerms, def) |
| }) |
| |
| // this query is impossible to fulfill with the current indexes. Not all the |
| // terms (equality + projection) are satisfied. |
| if missingTerms.Len() > 0 || len(idxs) == 0 { |
| remains := &ds.IndexDefinition{ |
| Kind: q.kind, |
| Ancestor: q.eqFilters["__ancestor__"] != nil, |
| } |
| terms := missingTerms.ToSlice() |
| if serializationDeterministic { |
| sort.Strings(terms) |
| } |
| for _, term := range terms { |
| remains.SortBy = append(remains.SortBy, ds.IndexColumn{Property: term}) |
| } |
| remains.SortBy = append(remains.SortBy, q.suffixFormat...) |
| last := remains.SortBy[len(remains.SortBy)-1] |
| if !last.Descending { |
| // this removes the __key__ column, since it's implicit. |
| remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] |
| } |
| if remains.Builtin() { |
| impossible( |
| fmt.Errorf("recommended missing index would be a builtin: %s", remains)) |
| } |
| return nil, &ErrMissingIndex{q.kc.Namespace, remains} |
| } |
| |
| return idxs, nil |
| } |
| |
| // generate generates a single iterDefinition for the given index. |
| func generate(q *reducedQuery, idx *indexDefinitionSortable, c *constraints) *iterDefinition { |
| def := &iterDefinition{ |
| c: idx.coll, |
| start: q.start, |
| end: q.end, |
| } |
| toJoin := make([][]byte, len(idx.eqFilts)) |
| for _, sb := range idx.eqFilts { |
| val := c.peel(sb.Property) |
| if sb.Descending { |
| val = cmpbin.InvertBytes(val) |
| } |
| toJoin = append(toJoin, val) |
| } |
| def.prefix = bytes.Join(toJoin, nil) |
| def.prefixLen = len(def.prefix) |
| |
| if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() { |
| // The query requires an ancestor, but the index doesn't explicitly have it |
| // as part of the prefix (otherwise it would have been the first eqFilt |
| // above). This happens when it's a builtin index, or if it's the primary |
| // index (for a kindless query), or if it's the Kind index (for a filterless |
| // query). |
| // |
| // builtin indexes are: |
| // Kind/__key__ |
| // Kind/Prop/__key__ |
| // Kind/Prop/-__key__ |
| if len(q.suffixFormat) > 2 || q.suffixFormat[len(q.suffixFormat)-1].Property != "__key__" { |
| // This should never happen. One of the previous validators would have |
| // selected a different index. But just in case. |
| impossible(fmt.Errorf("cannot supply an implicit ancestor for %#v", idx)) |
| } |
| |
| // get the only value out of __ancestor__ |
| anc, _ := q.eqFilters["__ancestor__"].Peek() |
| |
| // Intentionally do NOT update prefixLen. This allows multiIterator to |
| // correctly include the entire key in the shared iterator suffix, instead |
| // of just the remainder. |
| |
| // chop the terminal null byte off the q.ancestor key... we can accept |
| // anything which is a descendant or an exact match. Removing the last byte |
| // from the key (the terminating null) allows this trick to work. Otherwise |
| // it would be a closed range of EXACTLY this key. |
| chopped := []byte(anc[:len(anc)-1]) |
| if q.suffixFormat[0].Descending { |
| chopped = cmpbin.InvertBytes(chopped) |
| } |
| def.prefix = cmpbin.ConcatBytes(def.prefix, chopped) |
| |
| // Update start and end, since we know that if they contain anything, they |
| // contain values for the __key__ field. This is necessary because bytes |
| // are shifting from the suffix to the prefix, and start/end should only |
| // contain suffix (variable) bytes. |
| if def.start != nil { |
| if !bytes.HasPrefix(def.start, chopped) { |
| // again, shouldn't happen, but if it does, we want to know about it. |
| impossible(fmt.Errorf( |
| "start suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v", |
| def.start, chopped)) |
| } |
| def.start = def.start[len(chopped):] |
| } |
| if def.end != nil { |
| if !bytes.HasPrefix(def.end, chopped) { |
| impossible(fmt.Errorf( |
| "end suffix for implied ancestor doesn't start with ancestor! end:%v ancestor:%v", |
| def.end, chopped)) |
| } |
| def.end = def.end[len(chopped):] |
| } |
| } |
| |
| return def |
| } |
| |
| type constraints struct { |
| constraints map[string][][]byte |
| original map[string][][]byte |
| residualMapping map[string]int |
| } |
| |
| // peel picks a constraint value for the property. It then removes this value |
| // from constraints (possibly removing the entire row from constraints if it |
| // was the last value). If the value wasn't available in constraints, it picks |
| // the value from residuals. |
| func (c *constraints) peel(prop string) []byte { |
| ret := []byte(nil) |
| if vals, ok := c.constraints[prop]; ok { |
| ret = vals[0] |
| if len(vals) == 1 { |
| delete(c.constraints, prop) |
| } else { |
| c.constraints[prop] = vals[1:] |
| } |
| } else { |
| row := c.original[prop] |
| idx := c.residualMapping[prop] |
| c.residualMapping[prop]++ |
| ret = row[idx%len(row)] |
| } |
| return ret |
| } |
| |
| func (c *constraints) empty() bool { |
| return len(c.constraints) == 0 |
| } |
| |
| // calculateConstraints produces a mapping of all equality filters to the values |
| // that they're constrained to. It also calculates residuals, which are an |
| // arbitrary value for filling index prefixes which have more equality fields |
| // than are necessary. The value doesn't matter, as long as its an equality |
| // constraint in the original query. |
| func calculateConstraints(q *reducedQuery) *constraints { |
| ret := &constraints{ |
| original: make(map[string][][]byte, len(q.eqFilters)), |
| constraints: make(map[string][][]byte, len(q.eqFilters)), |
| residualMapping: make(map[string]int), |
| } |
| for prop, vals := range q.eqFilters { |
| bvals := make([][]byte, 0, vals.Len()) |
| vals.Iter(func(val string) bool { |
| bvals = append(bvals, []byte(val)) |
| return true |
| }) |
| ret.original[prop] = bvals |
| if prop == "__ancestor__" { |
| // exclude __ancestor__ from the constraints. |
| // |
| // This is because it's handled specially during index proposal and |
| // generation. Ancestor is used by ALL indexes, and so its residual value |
| // in ret.original above will be sufficient. |
| continue |
| } |
| ret.constraints[prop] = bvals |
| } |
| return ret |
| } |
| |
| // getIndexes returns a set of iterator definitions. Iterating over these |
| // will result in matching suffixes. |
| func getIndexes(q *reducedQuery, s memStore) ([]*iterDefinition, error) { |
| relevantIdxs := indexDefinitionSortableSlice(nil) |
| if q.kind == "" { |
| if coll := s.GetCollection("ents:" + q.kc.Namespace); coll != nil { |
| relevantIdxs = indexDefinitionSortableSlice{{coll: coll}} |
| } |
| } else { |
| err := error(nil) |
| relevantIdxs, err = getRelevantIndexes(q, s) |
| if err != nil { |
| return nil, err |
| } |
| } |
| if len(relevantIdxs) == 0 { |
| return nil, ds.ErrNullQuery |
| } |
| |
| // This sorts it so that relevantIdxs goes less filters -> more filters. We |
| // traverse this list backwards, however, so we traverse it in more filters -> |
| // less filters order. |
| sort.Sort(relevantIdxs) |
| |
| constraints := calculateConstraints(q) |
| |
| ret := []*iterDefinition{} |
| for !constraints.empty() || len(ret) == 0 { |
| bestIdx := (*indexDefinitionSortable)(nil) |
| if len(ret) == 0 { |
| // if ret is empty, take the biggest relevantIdx. It's guaranteed to have |
| // the greatest number of equality filters of any index in the list, and |
| // we know that every equality filter will be pulled from constraints and |
| // not residual. |
| // |
| // This also takes care of the case when the query has no equality filters, |
| // in which case relevantIdxs will actually only contain one index anyway |
| // :) |
| bestIdx = &relevantIdxs[len(relevantIdxs)-1] |
| if bestIdx.coll == nil { |
| return nil, ds.ErrNullQuery |
| } |
| } else { |
| // If ret's not empty, then we need to find the best index we can. The |
| // best index will be the one with the most matching equality columns. |
| // Since relevantIdxs is sorted primarially by the number of equality |
| // columns, we walk down the list until the number of possible columns is |
| // worse than our best-so-far. |
| // |
| // Traversing the list backwards goes from more filters -> less filters, |
| // but also allows us to remove items from the list as we iterate over it. |
| bestNumEqHits := 0 |
| for i := len(relevantIdxs) - 1; i >= 0; i-- { |
| idx := &relevantIdxs[i] |
| if len(idx.eqFilts) < bestNumEqHits { |
| // if the number of filters drops below our best hit, it's never going |
| // to get better than that. This index might be helpful on a later |
| // loop though, so don't remove it. |
| break |
| } |
| numHits := 0 |
| if idx.coll != nil { |
| numHits = idx.numEqHits(constraints) |
| } |
| if numHits > bestNumEqHits { |
| bestNumEqHits = numHits |
| bestIdx = idx |
| } else if numHits == 0 { |
| // This index will never become useful again, so remove it. |
| relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...) |
| } |
| } |
| } |
| if bestIdx == nil { |
| // something is really wrong here... if relevantIdxs is !nil, then we |
| // should always be able to make progress in this loop. |
| impossible(fmt.Errorf("deadlock: cannot fulfil query?")) |
| } |
| ret = append(ret, generate(q, bestIdx, constraints)) |
| } |
| |
| return ret, nil |
| } |