blob: 10f414689a624fa3ad0777bb2de20d29dabbe990 [file] [log] [blame]
/*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This package includes multiple probabalistic data structures needed for
// admission/eviction metadata. Most are Counting Bloom Filter variations, but
// a caching-specific feature that is also required is a "freshness" mechanism,
// which basically serves as a "lifetime" process. This freshness mechanism
// was described in the original TinyLFU paper [1], but other mechanisms may
// be better suited for certain data distributions.
//
// [1]: https://arxiv.org/abs/1512.00727
package ristretto
import (
"fmt"
"math/rand"
"time"
)
// cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily
// based on Damian Gryski's CM4 [1].
//
// [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go
type cmSketch struct {
rows [cmDepth]cmRow
seed [cmDepth]uint64
mask uint64
}
const (
// cmDepth is the number of counter copies to store (think of it as rows).
cmDepth = 4
)
func newCmSketch(numCounters int64) *cmSketch {
if numCounters == 0 {
panic("cmSketch: bad numCounters")
}
// Get the next power of 2 for better cache performance.
numCounters = next2Power(numCounters)
sketch := &cmSketch{mask: uint64(numCounters - 1)}
// Initialize rows of counters and seeds.
source := rand.New(rand.NewSource(time.Now().UnixNano()))
for i := 0; i < cmDepth; i++ {
sketch.seed[i] = source.Uint64()
sketch.rows[i] = newCmRow(numCounters)
}
return sketch
}
// Increment increments the count(ers) for the specified key.
func (s *cmSketch) Increment(hashed uint64) {
for i := range s.rows {
s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
}
}
// Estimate returns the value of the specified key.
func (s *cmSketch) Estimate(hashed uint64) int64 {
min := byte(255)
for i := range s.rows {
val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
if val < min {
min = val
}
}
return int64(min)
}
// Reset halves all counter values.
func (s *cmSketch) Reset() {
for _, r := range s.rows {
r.reset()
}
}
// Clear zeroes all counters.
func (s *cmSketch) Clear() {
for _, r := range s.rows {
r.clear()
}
}
// cmRow is a row of bytes, with each byte holding two counters.
type cmRow []byte
func newCmRow(numCounters int64) cmRow {
return make(cmRow, numCounters/2)
}
func (r cmRow) get(n uint64) byte {
return byte(r[n/2]>>((n&1)*4)) & 0x0f
}
func (r cmRow) increment(n uint64) {
// Index of the counter.
i := n / 2
// Shift distance (even 0, odd 4).
s := (n & 1) * 4
// Counter value.
v := (r[i] >> s) & 0x0f
// Only increment if not max value (overflow wrap is bad for LFU).
if v < 15 {
r[i] += 1 << s
}
}
func (r cmRow) reset() {
// Halve each counter.
for i := range r {
r[i] = (r[i] >> 1) & 0x77
}
}
func (r cmRow) clear() {
// Zero each counter.
for i := range r {
r[i] = 0
}
}
func (r cmRow) string() string {
s := ""
for i := uint64(0); i < uint64(len(r)*2); i++ {
s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
}
s = s[:len(s)-1]
return s
}
// next2Power rounds x up to the next power of 2, if it's not already one.
func next2Power(x int64) int64 {
x--
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
x |= x >> 32
x++
return x
}