blob: d8c94bb7adec6842b0224c87d80e2040da480b38 [file] [log] [blame]
/*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"math"
)
// PrintHistogram builds and displays the key-value size histogram.
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
// considered for creating the histogram
func (db *DB) PrintHistogram(keyPrefix []byte) {
if db == nil {
fmt.Println("\nCannot build histogram: DB is nil.")
return
}
histogram := db.buildHistogram(keyPrefix)
fmt.Printf("Histogram of key sizes (in bytes)\n")
histogram.keySizeHistogram.printHistogram()
fmt.Printf("Histogram of value sizes (in bytes)\n")
histogram.valueSizeHistogram.printHistogram()
}
// histogramData stores information about a histogram
type histogramData struct {
bins []int64
countPerBin []int64
totalCount int64
min int64
max int64
sum int64
}
// sizeHistogram contains keySize histogram and valueSize histogram
type sizeHistogram struct {
keySizeHistogram, valueSizeHistogram histogramData
}
// newSizeHistogram returns a new instance of keyValueSizeHistogram with
// properly initialized fields.
func newSizeHistogram() *sizeHistogram {
// TODO(ibrahim): find appropriate bin size.
keyBins := createHistogramBins(1, 16)
valueBins := createHistogramBins(1, 30)
return &sizeHistogram{
keySizeHistogram: histogramData{
bins: keyBins,
countPerBin: make([]int64, len(keyBins)+1),
max: math.MinInt64,
min: math.MaxInt64,
sum: 0,
},
valueSizeHistogram: histogramData{
bins: valueBins,
countPerBin: make([]int64, len(valueBins)+1),
max: math.MinInt64,
min: math.MaxInt64,
sum: 0,
},
}
}
// createHistogramBins creates bins for an histogram. The bin sizes are powers
// of two of the form [2^min_exponent, ..., 2^max_exponent].
func createHistogramBins(minExponent, maxExponent uint32) []int64 {
var bins []int64
for i := minExponent; i <= maxExponent; i++ {
bins = append(bins, int64(1)<<i)
}
return bins
}
// Update the min and max fields if value is less than or greater than the
// current min/max value.
func (histogram *histogramData) Update(value int64) {
if value > histogram.max {
histogram.max = value
}
if value < histogram.min {
histogram.min = value
}
histogram.sum += value
histogram.totalCount++
for index := 0; index <= len(histogram.bins); index++ {
// Allocate value in the last buckets if we reached the end of the Bounds array.
if index == len(histogram.bins) {
histogram.countPerBin[index]++
break
}
// Check if the value should be added to the "index" bin
if value < int64(histogram.bins[index]) {
histogram.countPerBin[index]++
break
}
}
}
// buildHistogram builds the key-value size histogram.
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
// considered for creating the histogram
func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram {
txn := db.NewTransaction(false)
defer txn.Discard()
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
badgerHistogram := newSizeHistogram()
// Collect key and value sizes.
for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
item := itr.Item()
badgerHistogram.keySizeHistogram.Update(item.KeySize())
badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
}
return badgerHistogram
}
// printHistogram prints the histogram data in a human-readable format.
func (histogram histogramData) printHistogram() {
fmt.Printf("Total count: %d\n", histogram.totalCount)
fmt.Printf("Min value: %d\n", histogram.min)
fmt.Printf("Max value: %d\n", histogram.max)
fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
fmt.Printf("%24s %9s\n", "Range", "Count")
numBins := len(histogram.bins)
for index, count := range histogram.countPerBin {
if count == 0 {
continue
}
// The last bin represents the bin that contains the range from
// the last bin up to infinity so it's processed differently than the
// other bins.
if index == len(histogram.countPerBin)-1 {
lowerBound := int(histogram.bins[numBins-1])
fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
continue
}
upperBound := int(histogram.bins[index])
lowerBound := 0
if index > 0 {
lowerBound = int(histogram.bins[index-1])
}
fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
}
fmt.Println()
}