blob: 255fad076535d3b5c0b5ccc4d8706f5f0f9b9c1d [file] [log] [blame] [edit]
/*
* Copyright 2020 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package z
import (
"fmt"
"io/ioutil"
"math"
"math/rand"
"os"
"sort"
"testing"
"github.com/dgraph-io/ristretto/z/simd"
"github.com/dustin/go-humanize"
"github.com/stretchr/testify/require"
)
var tmp int
func setPageSize(sz int) {
pageSize = sz
maxKeys = (pageSize / 16) - 1
}
func TestTree(t *testing.T) {
bt := NewTree("", 1<<20)
N := uint64(256 * 256)
for i := uint64(1); i < N; i++ {
bt.Set(i, i)
}
for i := uint64(1); i < N; i++ {
require.Equal(t, i, bt.Get(i))
}
bt.DeleteBelow(100)
for i := uint64(1); i < 100; i++ {
require.Equal(t, uint64(0), bt.Get(i))
}
for i := uint64(100); i < N; i++ {
require.Equal(t, i, bt.Get(i))
}
}
func TestTreeBasic(t *testing.T) {
setAndGet := func() {
bt := NewTree("", 1<<20)
defer bt.Release()
N := uint64(1 << 20)
mp := make(map[uint64]uint64)
for i := uint64(1); i < N; i++ {
key := uint64(rand.Int63n(1<<60) + 1)
bt.Set(key, key)
mp[key] = key
}
for k, v := range mp {
require.Equal(t, v, bt.Get(k))
}
stats := bt.Stats()
t.Logf("final stats: %+v\n", stats)
}
setAndGet()
defer setPageSize(os.Getpagesize())
setPageSize(16 << 5)
setAndGet()
}
func TestTreeReset(t *testing.T) {
bt := NewTree("", 1<<20)
defer bt.Release()
N := 1 << 10
val := rand.Uint64()
for i := 0; i < N; i++ {
bt.Set(rand.Uint64(), val)
}
// Truncate it to small size that is less than pageSize.
bt.Reset(1 << 10)
stats := bt.Stats()
// Verify the tree stats.
require.Equal(t, 3, stats.NextPage)
require.Equal(t, 2, stats.NumNodes)
require.Equal(t, 1, stats.NumLeafKeys)
require.Equal(t, 1, stats.NumLeafKeys)
require.Equal(t, 2*pageSize, stats.Bytes)
expectedOcc := float64(2) * 100 / float64(stats.NumNodes*maxKeys)
require.InDelta(t, expectedOcc, stats.Occupancy, 0.01)
require.Zero(t, stats.FreePages)
// Check if we can reinsert the data.
mp := make(map[uint64]uint64)
for i := 0; i < N; i++ {
k := rand.Uint64()
mp[k] = val
bt.Set(k, val)
}
for k, v := range mp {
require.Equal(t, v, bt.Get(k))
}
}
func TestTreeCycle(t *testing.T) {
bt := NewTree("", 1<<20)
defer bt.Release()
val := uint64(0)
for i := 0; i < 16; i++ {
for j := 0; j < 1e6+i*1e4; j++ {
val += 1
bt.Set(rand.Uint64(), val)
}
before := bt.Stats()
bt.DeleteBelow(val - 1e4)
after := bt.Stats()
t.Logf("Cycle %d Done. Before: %+v -> After: %+v\n", i, before, after)
}
bt.DeleteBelow(val)
stats := bt.Stats()
t.Logf("stats: %+v\n", stats)
require.LessOrEqual(t, stats.Occupancy, 1.0)
require.GreaterOrEqual(t, stats.FreePages, int(float64(stats.NextPage)*0.95))
}
func TestOccupancyRatio(t *testing.T) {
// atmax 4 keys per node
setPageSize(16 * 5)
defer setPageSize(os.Getpagesize())
require.Equal(t, 4, maxKeys)
bt := NewTree("", 1<<20)
defer bt.Release()
expectedRatio := float64(1) * 100 / float64(maxKeys)
stats := bt.Stats()
require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
for i := uint64(1); i <= 3; i++ {
bt.Set(i, i)
}
// Tree structure will be:
// [2,Max,_,_]
// [1,2,_,_] [3,Max,_,_]
expectedRatio = float64(6) * 100 / float64(3*maxKeys)
stats = bt.Stats()
require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
bt.DeleteBelow(2)
// Tree structure will be:
// [2,Max,_]
// [2,_,_,_] [3,Max,_,_]
expectedRatio = float64(5) * 100 / float64(3*maxKeys)
stats = bt.Stats()
require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
}
func TestNode(t *testing.T) {
n := getNode(make([]byte, pageSize))
for i := uint64(1); i < 16; i *= 2 {
n.set(i, i)
}
n.print(0)
require.True(t, 0 == n.get(5))
n.set(5, 5)
n.print(0)
n.setBit(0)
require.False(t, n.isLeaf())
n.setBit(bitLeaf)
require.True(t, n.isLeaf())
}
func TestNodeBasic(t *testing.T) {
n := getNode(make([]byte, pageSize))
N := uint64(256)
mp := make(map[uint64]uint64)
for i := uint64(1); i < N; i++ {
key := uint64(rand.Int63n(1<<60) + 1)
n.set(key, key)
mp[key] = key
}
for k, v := range mp {
require.Equal(t, v, n.get(k))
}
}
func TestNode_MoveRight(t *testing.T) {
n := getNode(make([]byte, pageSize))
N := uint64(10)
for i := uint64(1); i < N; i++ {
n.set(i, i)
}
n.moveRight(5)
n.iterate(func(n node, i int) {
if i < 5 {
require.Equal(t, uint64(i+1), n.key(i))
require.Equal(t, uint64(i+1), n.val(i))
} else if i > 5 {
require.Equal(t, uint64(i), n.key(i))
require.Equal(t, uint64(i), n.val(i))
}
})
}
func TestNodeCompact(t *testing.T) {
n := getNode(make([]byte, pageSize))
n.setBit(bitLeaf)
N := uint64(128)
mp := make(map[uint64]uint64)
for i := uint64(1); i < N; i++ {
key := i
val := uint64(10)
if i%2 == 0 {
val = 20
mp[key] = 20
}
n.set(key, val)
}
require.Equal(t, int(N/2), n.compact(11))
for k, v := range mp {
require.Equal(t, v, n.get(k))
}
require.Equal(t, uint64(127), n.maxKey())
}
func BenchmarkWrite(b *testing.B) {
b.Run("map", func(b *testing.B) {
mp := make(map[uint64]uint64)
for n := 0; n < b.N; n++ {
k := rand.Uint64()
mp[k] = k
}
})
b.Run("btree", func(b *testing.B) {
bt := NewTree("", 1<<30)
defer bt.Release()
b.ResetTimer()
for n := 0; n < b.N; n++ {
k := rand.Uint64()
bt.Set(k, k)
}
})
}
// goos: linux
// goarch: amd64
// pkg: github.com/dgraph-io/ristretto/z
// BenchmarkRead/map-4 10845322 109 ns/op
// BenchmarkRead/btree-4 2744283 430 ns/op
// Cumulative for 10 runs.
// name time/op
// Read/map-4 105ns ± 1%
// Read/btree-4 422ns ± 1%
func BenchmarkRead(b *testing.B) {
N := 10 << 20
mp := make(map[uint64]uint64)
for i := 0; i < N; i++ {
k := uint64(rand.Intn(2*N)) + 1
mp[k] = k
}
b.Run("map", func(b *testing.B) {
for i := 0; i < b.N; i++ {
k := uint64(rand.Intn(2 * N))
v, ok := mp[k]
_, _ = v, ok
}
})
bt := NewTree("", 1<<30)
defer bt.Release()
for i := 0; i < N; i++ {
k := uint64(rand.Intn(2*N)) + 1
bt.Set(k, k)
}
stats := bt.Stats()
fmt.Printf("Num pages: %d Size: %s\n", stats.NumNodes,
humanize.IBytes(uint64(stats.Bytes)))
fmt.Println("Writes done.")
b.Run("btree", func(b *testing.B) {
for i := 0; i < b.N; i++ {
k := uint64(rand.Intn(2*N)) + 1
v := bt.Get(k)
_ = v
}
})
}
func BenchmarkSearch(b *testing.B) {
linear := func(n node, k uint64, N int) int {
for i := 0; i < N; i++ {
if ki := n.key(i); ki >= k {
return i
}
}
return N
}
binary := func(n node, k uint64, N int) int {
return sort.Search(N, func(i int) bool {
return n.key(i) >= k
})
}
unroll4 := func(n node, k uint64, N int) int {
if len(n[:2*N]) < 8 {
for i := 0; i < N; i++ {
if ki := n.key(i); ki >= k {
return i
}
}
return N
}
return int(simd.Search(n[:2*N], k))
}
jumpBy := []int{8, 16, 32, 64, 128, 196, 255}
for _, sz := range jumpBy {
f, err := ioutil.TempFile(".", "tree")
require.NoError(b, err)
mf, err := OpenMmapFileUsing(f, pageSize, true)
if err != NewFile {
require.NoError(b, err)
}
n := getNode(mf.Data)
for i := 1; i <= sz; i++ {
n.set(uint64(i), uint64(i))
}
b.Run(fmt.Sprintf("linear-%d", sz), func(b *testing.B) {
for i := 0; i < b.N; i++ {
tmp = linear(n, math.MaxUint64, sz)
}
})
b.Run(fmt.Sprintf("binary-%d", sz), func(b *testing.B) {
for i := 0; i < b.N; i++ {
tmp = binary(n, uint64(sz), sz)
}
})
b.Run(fmt.Sprintf("unrolled-asm-%d", sz), func(b *testing.B) {
for i := 0; i < b.N; i++ {
tmp = unroll4(n, math.MaxUint64, sz)
}
})
mf.Close(0)
os.Remove(f.Name())
}
}
// This benchmark when run on dgus-delta, performed marginally better with threshold=32.
// CustomSearch/sz-64_th-1-4 49.9ns ± 1% (fully binary)
// CustomSearch/sz-64_th-16-4 63.3ns ± 0%
// CustomSearch/sz-64_th-32-4 58.7ns ± 7%
// CustomSearch/sz-64_th-64-4 63.9ns ± 7% (fully linear)
// CustomSearch/sz-128_th-32-4 70.2ns ± 1%
// CustomSearch/sz-255_th-1-4 77.3ns ± 0% (fully binary)
// CustomSearch/sz-255_th-16-4 68.2ns ± 1%
// CustomSearch/sz-255_th-32-4 67.0ns ± 7%
// CustomSearch/sz-255_th-64-4 85.5ns ±19%
// CustomSearch/sz-255_th-256-4 129ns ± 6% (fully linear)
func BenchmarkCustomSearch(b *testing.B) {
mixed := func(n node, k uint64, N int, threshold int) int {
lo, hi := 0, N
// Reduce the search space using binary seach and then do linear search.
for hi-lo > threshold {
mid := (hi + lo) / 2
km := n.key(mid)
if k == km {
return mid
}
if k > km {
// key is greater than the key at mid, so move right.
lo = mid + 1
} else {
// else move left.
hi = mid
}
}
for i := lo; i <= hi; i++ {
if ki := n.key(i); ki >= k {
return i
}
}
return N
}
for _, sz := range []int{64, 128, 255} {
n := getNode(make([]byte, pageSize))
for i := 1; i <= sz; i++ {
n.set(uint64(i), uint64(i))
}
mk := sz + 1
for th := 1; th <= sz+1; th *= 2 {
b.Run(fmt.Sprintf("sz-%d th-%d", sz, th), func(b *testing.B) {
for i := 0; i < b.N; i++ {
k := uint64(rand.Intn(mk))
tmp = mixed(n, k, sz, th)
}
})
}
}
}