blob: 37012be06db00b0c16d9dc2ead5abb9f7312d3cb [file]
package zstd
import (
"bytes"
"encoding/csv"
"fmt"
"io"
"os"
"reflect"
"strconv"
"testing"
"time"
"github.com/klauspost/compress/zip"
)
func TestSequenceDecsAdjustOffset(t *testing.T) {
type result struct {
offset int
prevOffset [3]int
}
tc := []struct {
offset int
litLen int
offsetB uint8
prevOffset [3]int
res result
}{{
offset: 444,
litLen: 0,
offsetB: 42,
prevOffset: [3]int{111, 222, 333},
res: result{
offset: 444,
prevOffset: [3]int{444, 111, 222},
},
}, {
offset: 0,
litLen: 1,
offsetB: 0,
prevOffset: [3]int{111, 222, 333},
res: result{
offset: 111,
prevOffset: [3]int{111, 222, 333},
},
}, {
offset: -1,
litLen: 0,
offsetB: 0,
prevOffset: [3]int{111, 222, 333},
res: result{
offset: 111,
prevOffset: [3]int{111, 222, 333},
},
}, {
offset: 1,
litLen: 1,
offsetB: 0,
prevOffset: [3]int{111, 222, 333},
res: result{
offset: 222,
prevOffset: [3]int{222, 111, 333},
},
}, {
offset: 2,
litLen: 1,
offsetB: 0,
prevOffset: [3]int{111, 222, 333},
res: result{
offset: 333,
prevOffset: [3]int{333, 111, 222},
},
}, {
offset: 3,
litLen: 1,
offsetB: 0,
prevOffset: [3]int{111, 222, 333},
res: result{
offset: 110, // s.prevOffset[0] - 1
prevOffset: [3]int{110, 111, 222},
},
}, {
offset: 3,
litLen: 1,
offsetB: 0,
prevOffset: [3]int{1, 222, 333},
res: result{
offset: 1,
prevOffset: [3]int{1, 1, 222},
},
},
}
for i := range tc {
// given
var sd sequenceDecs
for j := range 3 {
sd.prevOffset[j] = tc[i].prevOffset[j]
}
// when
offset := sd.adjustOffset(tc[i].offset, tc[i].litLen, tc[i].offsetB)
// then
if offset != tc[i].res.offset {
t.Logf("result: %d", offset)
t.Logf("expected: %d", tc[i].res.offset)
t.Errorf("testcase #%d: wrong function result", i)
}
for j := range 3 {
if sd.prevOffset[j] != tc[i].res.prevOffset[j] {
t.Logf("result: %v", sd.prevOffset)
t.Logf("expected: %v", tc[i].res.prevOffset)
t.Errorf("testcase #%d: sd.prevOffset got wrongly updated", i)
break
}
}
}
}
type testSequence struct {
n, lits, win int
prevOffsets [3]int
}
func (s *testSequence) parse(fn string) (ok bool) {
n, err := fmt.Sscanf(fn, "n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", &s.n, &s.lits, &s.prevOffsets[0], &s.prevOffsets[1], &s.prevOffsets[2], &s.win)
ok = err == nil && n == 6
if !ok {
fmt.Println("Unable to parse:", err, n)
}
return ok
}
func readDecoders(tb testing.TB, buf *bytes.Buffer, ref testSequence) sequenceDecs {
s := sequenceDecs{
litLengths: sequenceDec{fse: &fseDecoder{}},
offsets: sequenceDec{fse: &fseDecoder{}},
matchLengths: sequenceDec{fse: &fseDecoder{}},
prevOffset: ref.prevOffsets,
dict: nil,
literals: make([]byte, ref.lits, ref.lits+compressedBlockOverAlloc),
out: nil,
nSeqs: ref.n,
br: nil,
seqSize: 0,
windowSize: ref.win,
maxBits: 0,
}
s.litLengths.fse.mustReadFrom(buf)
s.matchLengths.fse.mustReadFrom(buf)
s.offsets.fse.mustReadFrom(buf)
s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
s.br = &bitReader{}
return s
}
func Test_seqdec_decode_regression(t *testing.T) {
zr := testCreateZipReader("testdata/decode-regression.zip", t)
for _, tt := range zr.File {
t.Run(tt.Name, func(t *testing.T) {
f, err := tt.Open()
if err != nil {
t.Error(err)
return
}
defer f.Close()
// Note: make sure we create stream reader
dec, err := NewReader(f, WithDecoderConcurrency(4))
if err != nil {
t.Error(err)
return
}
var buf []byte
_, err = io.ReadFull(dec, buf)
if err != nil {
t.Error(err)
return
}
})
}
}
func Test_seqdec_decoder(t *testing.T) {
const writeWant = false
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
want := map[string][]seqVals{}
var wantOffsets = map[string][3]int{}
if !writeWant {
zr := testCreateZipReader("testdata/seqs-want.zip", t)
tb := t
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
o, err := tt.Open()
if err != nil {
t.Fatal(err)
}
r := csv.NewReader(o)
recs, err := r.ReadAll()
if err != nil {
t.Fatal(err)
}
for i, rec := range recs {
if i == 0 {
var o [3]int
o[0], _ = strconv.Atoi(rec[0])
o[1], _ = strconv.Atoi(rec[1])
o[2], _ = strconv.Atoi(rec[2])
wantOffsets[tt.Name] = o
continue
}
s := seqVals{}
s.mo, _ = strconv.Atoi(rec[0])
s.ml, _ = strconv.Atoi(rec[1])
s.ll, _ = strconv.Atoi(rec[2])
want[tt.Name] = append(want[tt.Name], s)
}
o.Close()
}
}
zr := testCreateZipReader("testdata/seqs.zip", t)
tb := t
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
r, err := tt.Open()
if err != nil {
tb.Error(err)
return
}
seqData, err := io.ReadAll(r)
if err != nil {
tb.Error(err)
return
}
var buf = bytes.NewBuffer(seqData)
s := readDecoders(tb, buf, ref)
seqs := make([]seqVals, ref.n)
t.Run(tt.Name, func(t *testing.T) {
fatalIf := func(err error) {
if err != nil {
t.Fatal(err)
}
}
fatalIf(s.br.init(buf.Bytes()))
fatalIf(s.litLengths.init(s.br))
fatalIf(s.offsets.init(s.br))
fatalIf(s.matchLengths.init(s.br))
err := s.decode(seqs)
if err != nil {
t.Error(err)
}
if writeWant {
w, err := zw.Create(tt.Name)
fatalIf(err)
c := csv.NewWriter(w)
w.Write(fmt.Appendf(nil, "%d,%d,%d\n", s.prevOffset[0], s.prevOffset[1], s.prevOffset[2]))
for _, seq := range seqs {
c.Write([]string{strconv.Itoa(seq.mo), strconv.Itoa(seq.ml), strconv.Itoa(seq.ll)})
}
c.Flush()
} else {
if s.prevOffset != wantOffsets[tt.Name] {
t.Errorf("want offsets %v, got %v", wantOffsets[tt.Name], s.prevOffset)
}
if !reflect.DeepEqual(want[tt.Name], seqs) {
t.Errorf("got %v\nwant %v", seqs, want[tt.Name])
}
}
})
}
if writeWant {
zw.Close()
os.WriteFile("testdata/seqs-want.zip", buf.Bytes(), os.ModePerm)
}
}
func Test_seqdec_execute(t *testing.T) {
zr := testCreateZipReader("testdata/seqs.zip", t)
tb := t
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
r, err := tt.Open()
if err != nil {
tb.Error(err)
return
}
seqData, err := io.ReadAll(r)
if err != nil {
tb.Error(err)
return
}
var buf = bytes.NewBuffer(seqData)
s := readDecoders(tb, buf, ref)
seqs := make([]seqVals, ref.n)
fatalIf := func(err error) {
if err != nil {
tb.Fatal(err)
}
}
fatalIf(s.br.init(buf.Bytes()))
fatalIf(s.litLengths.init(s.br))
fatalIf(s.offsets.init(s.br))
fatalIf(s.matchLengths.init(s.br))
fatalIf(s.decode(seqs))
hist := make([]byte, ref.win)
lits := s.literals
t.Run(tt.Name, func(t *testing.T) {
s.literals = lits
if len(s.out) > 0 {
s.out = s.out[:0]
}
err := s.execute(seqs, hist)
if err != nil {
t.Fatal(err)
}
if len(s.out) != s.seqSize {
t.Errorf("want %d != got %d", s.seqSize, len(s.out))
}
})
}
}
func Test_seqdec_decodeSync(t *testing.T) {
zr := testCreateZipReader("testdata/seqs.zip", t)
tb := t
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
r, err := tt.Open()
if err != nil {
tb.Error(err)
return
}
seqData, err := io.ReadAll(r)
if err != nil {
tb.Error(err)
return
}
var buf = bytes.NewBuffer(seqData)
s := readDecoders(tb, buf, ref)
lits := s.literals
hist := make([]byte, ref.win)
t.Run(tt.Name, func(t *testing.T) {
fatalIf := func(err error) {
if err != nil {
t.Fatal(err)
}
}
fatalIf(s.br.init(buf.Bytes()))
fatalIf(s.litLengths.init(s.br))
fatalIf(s.offsets.init(s.br))
fatalIf(s.matchLengths.init(s.br))
s.literals = lits
if len(s.out) > 0 {
s.out = s.out[:0]
}
err := s.decodeSync(hist)
if err != nil {
t.Fatal(err)
}
})
}
}
func Benchmark_seqdec_decode(b *testing.B) {
benchmark_seqdec_decode(b)
}
func benchmark_seqdec_decode(b *testing.B) {
zr := testCreateZipReader("testdata/seqs.zip", b)
tb := b
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
r, err := tt.Open()
if err != nil {
tb.Error(err)
return
}
seqData, err := io.ReadAll(r)
if err != nil {
tb.Error(err)
return
}
var buf = bytes.NewBuffer(seqData)
s := readDecoders(tb, buf, ref)
seqs := make([]seqVals, ref.n)
b.Run(tt.Name, func(b *testing.B) {
fatalIf := func(err error) {
if err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
b.ResetTimer()
t := time.Now()
decoded := 0
remain := uint(0)
for i := 0; i < b.N; i++ {
fatalIf(s.br.init(buf.Bytes()))
fatalIf(s.litLengths.init(s.br))
fatalIf(s.offsets.init(s.br))
fatalIf(s.matchLengths.init(s.br))
remain = s.br.remain()
err := s.decode(seqs)
if err != nil {
b.Fatal(err)
}
decoded += ref.n
}
b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s")
b.ReportMetric(float64(remain)/float64(s.nSeqs), "b/seq")
})
}
}
func Benchmark_seqdec_execute(b *testing.B) {
zr := testCreateZipReader("testdata/seqs.zip", b)
tb := b
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
r, err := tt.Open()
if err != nil {
tb.Error(err)
return
}
seqData, err := io.ReadAll(r)
if err != nil {
tb.Error(err)
return
}
var buf = bytes.NewBuffer(seqData)
s := readDecoders(tb, buf, ref)
seqs := make([]seqVals, ref.n)
fatalIf := func(err error) {
if err != nil {
b.Fatal(err)
}
}
fatalIf(s.br.init(buf.Bytes()))
fatalIf(s.litLengths.init(s.br))
fatalIf(s.offsets.init(s.br))
fatalIf(s.matchLengths.init(s.br))
fatalIf(s.decode(seqs))
hist := make([]byte, ref.win)
lits := s.literals
b.Run(tt.Name, func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(s.seqSize))
b.ResetTimer()
t := time.Now()
decoded := 0
for i := 0; i < b.N; i++ {
s.literals = lits
if len(s.out) > 0 {
s.out = s.out[:0]
}
fatalIf(s.execute(seqs, hist))
decoded += ref.n
}
b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s")
})
}
}
func Benchmark_seqdec_decodeSync(b *testing.B) {
zr := testCreateZipReader("testdata/seqs.zip", b)
tb := b
for _, tt := range zr.File {
var ref testSequence
if !ref.parse(tt.Name) {
tb.Skip("unable to parse:", tt.Name)
}
r, err := tt.Open()
if err != nil {
tb.Error(err)
return
}
seqData, err := io.ReadAll(r)
if err != nil {
tb.Error(err)
return
}
var buf = bytes.NewBuffer(seqData)
s := readDecoders(tb, buf, ref)
lits := s.literals
hist := make([]byte, ref.win)
b.Run(tt.Name, func(b *testing.B) {
fatalIf := func(err error) {
if err != nil {
b.Fatal(err)
}
}
decoded := 0
b.ReportAllocs()
b.ResetTimer()
t := time.Now()
for i := 0; i < b.N; i++ {
fatalIf(s.br.init(buf.Bytes()))
fatalIf(s.litLengths.init(s.br))
fatalIf(s.offsets.init(s.br))
fatalIf(s.matchLengths.init(s.br))
s.literals = lits
if len(s.out) > 0 {
s.out = s.out[:0]
}
err := s.decodeSync(hist)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(len(s.out)))
decoded += ref.n
}
b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s")
})
}
}
func testCreateZipReader(path string, tb testing.TB) *zip.Reader {
failOnError := func(err error) {
if err != nil {
tb.Fatal(err)
}
}
data, err := os.ReadFile(path)
failOnError(err)
zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
failOnError(err)
return zr
}