| package zstd |
| |
| import ( |
| "bytes" |
| "encoding/csv" |
| "fmt" |
| "io" |
| "os" |
| "reflect" |
| "strconv" |
| "testing" |
| "time" |
| |
| "github.com/klauspost/compress/zip" |
| ) |
| |
| func TestSequenceDecsAdjustOffset(t *testing.T) { |
| type result struct { |
| offset int |
| prevOffset [3]int |
| } |
| |
| tc := []struct { |
| offset int |
| litLen int |
| offsetB uint8 |
| prevOffset [3]int |
| |
| res result |
| }{{ |
| offset: 444, |
| litLen: 0, |
| offsetB: 42, |
| prevOffset: [3]int{111, 222, 333}, |
| |
| res: result{ |
| offset: 444, |
| prevOffset: [3]int{444, 111, 222}, |
| }, |
| }, { |
| offset: 0, |
| litLen: 1, |
| offsetB: 0, |
| prevOffset: [3]int{111, 222, 333}, |
| |
| res: result{ |
| offset: 111, |
| prevOffset: [3]int{111, 222, 333}, |
| }, |
| }, { |
| offset: -1, |
| litLen: 0, |
| offsetB: 0, |
| prevOffset: [3]int{111, 222, 333}, |
| |
| res: result{ |
| offset: 111, |
| prevOffset: [3]int{111, 222, 333}, |
| }, |
| }, { |
| offset: 1, |
| litLen: 1, |
| offsetB: 0, |
| prevOffset: [3]int{111, 222, 333}, |
| |
| res: result{ |
| offset: 222, |
| prevOffset: [3]int{222, 111, 333}, |
| }, |
| }, { |
| offset: 2, |
| litLen: 1, |
| offsetB: 0, |
| prevOffset: [3]int{111, 222, 333}, |
| |
| res: result{ |
| offset: 333, |
| prevOffset: [3]int{333, 111, 222}, |
| }, |
| }, { |
| offset: 3, |
| litLen: 1, |
| offsetB: 0, |
| prevOffset: [3]int{111, 222, 333}, |
| |
| res: result{ |
| offset: 110, // s.prevOffset[0] - 1 |
| prevOffset: [3]int{110, 111, 222}, |
| }, |
| }, { |
| offset: 3, |
| litLen: 1, |
| offsetB: 0, |
| prevOffset: [3]int{1, 222, 333}, |
| |
| res: result{ |
| offset: 1, |
| prevOffset: [3]int{1, 1, 222}, |
| }, |
| }, |
| } |
| |
| for i := range tc { |
| // given |
| var sd sequenceDecs |
| for j := range 3 { |
| sd.prevOffset[j] = tc[i].prevOffset[j] |
| } |
| |
| // when |
| offset := sd.adjustOffset(tc[i].offset, tc[i].litLen, tc[i].offsetB) |
| |
| // then |
| if offset != tc[i].res.offset { |
| t.Logf("result: %d", offset) |
| t.Logf("expected: %d", tc[i].res.offset) |
| t.Errorf("testcase #%d: wrong function result", i) |
| } |
| |
| for j := range 3 { |
| if sd.prevOffset[j] != tc[i].res.prevOffset[j] { |
| t.Logf("result: %v", sd.prevOffset) |
| t.Logf("expected: %v", tc[i].res.prevOffset) |
| t.Errorf("testcase #%d: sd.prevOffset got wrongly updated", i) |
| break |
| } |
| } |
| } |
| } |
| |
| type testSequence struct { |
| n, lits, win int |
| prevOffsets [3]int |
| } |
| |
| func (s *testSequence) parse(fn string) (ok bool) { |
| n, err := fmt.Sscanf(fn, "n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", &s.n, &s.lits, &s.prevOffsets[0], &s.prevOffsets[1], &s.prevOffsets[2], &s.win) |
| ok = err == nil && n == 6 |
| if !ok { |
| fmt.Println("Unable to parse:", err, n) |
| } |
| return ok |
| } |
| |
| func readDecoders(tb testing.TB, buf *bytes.Buffer, ref testSequence) sequenceDecs { |
| s := sequenceDecs{ |
| litLengths: sequenceDec{fse: &fseDecoder{}}, |
| offsets: sequenceDec{fse: &fseDecoder{}}, |
| matchLengths: sequenceDec{fse: &fseDecoder{}}, |
| prevOffset: ref.prevOffsets, |
| dict: nil, |
| literals: make([]byte, ref.lits, ref.lits+compressedBlockOverAlloc), |
| out: nil, |
| nSeqs: ref.n, |
| br: nil, |
| seqSize: 0, |
| windowSize: ref.win, |
| maxBits: 0, |
| } |
| |
| s.litLengths.fse.mustReadFrom(buf) |
| s.matchLengths.fse.mustReadFrom(buf) |
| s.offsets.fse.mustReadFrom(buf) |
| |
| s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits |
| s.br = &bitReader{} |
| return s |
| } |
| |
| func Test_seqdec_decode_regression(t *testing.T) { |
| zr := testCreateZipReader("testdata/decode-regression.zip", t) |
| |
| for _, tt := range zr.File { |
| t.Run(tt.Name, func(t *testing.T) { |
| f, err := tt.Open() |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| defer f.Close() |
| |
| // Note: make sure we create stream reader |
| dec, err := NewReader(f, WithDecoderConcurrency(4)) |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| |
| var buf []byte |
| _, err = io.ReadFull(dec, buf) |
| if err != nil { |
| t.Error(err) |
| return |
| } |
| }) |
| } |
| } |
| |
| func Test_seqdec_decoder(t *testing.T) { |
| const writeWant = false |
| var buf bytes.Buffer |
| zw := zip.NewWriter(&buf) |
| |
| want := map[string][]seqVals{} |
| var wantOffsets = map[string][3]int{} |
| if !writeWant { |
| zr := testCreateZipReader("testdata/seqs-want.zip", t) |
| tb := t |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| o, err := tt.Open() |
| if err != nil { |
| t.Fatal(err) |
| } |
| r := csv.NewReader(o) |
| recs, err := r.ReadAll() |
| if err != nil { |
| t.Fatal(err) |
| } |
| for i, rec := range recs { |
| if i == 0 { |
| var o [3]int |
| o[0], _ = strconv.Atoi(rec[0]) |
| o[1], _ = strconv.Atoi(rec[1]) |
| o[2], _ = strconv.Atoi(rec[2]) |
| wantOffsets[tt.Name] = o |
| continue |
| } |
| s := seqVals{} |
| s.mo, _ = strconv.Atoi(rec[0]) |
| s.ml, _ = strconv.Atoi(rec[1]) |
| s.ll, _ = strconv.Atoi(rec[2]) |
| want[tt.Name] = append(want[tt.Name], s) |
| } |
| o.Close() |
| } |
| } |
| zr := testCreateZipReader("testdata/seqs.zip", t) |
| tb := t |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| r, err := tt.Open() |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| |
| seqData, err := io.ReadAll(r) |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| var buf = bytes.NewBuffer(seqData) |
| s := readDecoders(tb, buf, ref) |
| seqs := make([]seqVals, ref.n) |
| |
| t.Run(tt.Name, func(t *testing.T) { |
| fatalIf := func(err error) { |
| if err != nil { |
| t.Fatal(err) |
| } |
| } |
| fatalIf(s.br.init(buf.Bytes())) |
| fatalIf(s.litLengths.init(s.br)) |
| fatalIf(s.offsets.init(s.br)) |
| fatalIf(s.matchLengths.init(s.br)) |
| |
| err := s.decode(seqs) |
| if err != nil { |
| t.Error(err) |
| } |
| if writeWant { |
| w, err := zw.Create(tt.Name) |
| fatalIf(err) |
| c := csv.NewWriter(w) |
| w.Write(fmt.Appendf(nil, "%d,%d,%d\n", s.prevOffset[0], s.prevOffset[1], s.prevOffset[2])) |
| for _, seq := range seqs { |
| c.Write([]string{strconv.Itoa(seq.mo), strconv.Itoa(seq.ml), strconv.Itoa(seq.ll)}) |
| } |
| c.Flush() |
| } else { |
| if s.prevOffset != wantOffsets[tt.Name] { |
| t.Errorf("want offsets %v, got %v", wantOffsets[tt.Name], s.prevOffset) |
| } |
| |
| if !reflect.DeepEqual(want[tt.Name], seqs) { |
| t.Errorf("got %v\nwant %v", seqs, want[tt.Name]) |
| } |
| } |
| }) |
| } |
| if writeWant { |
| zw.Close() |
| os.WriteFile("testdata/seqs-want.zip", buf.Bytes(), os.ModePerm) |
| } |
| } |
| |
| func Test_seqdec_execute(t *testing.T) { |
| zr := testCreateZipReader("testdata/seqs.zip", t) |
| tb := t |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| r, err := tt.Open() |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| |
| seqData, err := io.ReadAll(r) |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| var buf = bytes.NewBuffer(seqData) |
| s := readDecoders(tb, buf, ref) |
| seqs := make([]seqVals, ref.n) |
| |
| fatalIf := func(err error) { |
| if err != nil { |
| tb.Fatal(err) |
| } |
| } |
| fatalIf(s.br.init(buf.Bytes())) |
| fatalIf(s.litLengths.init(s.br)) |
| fatalIf(s.offsets.init(s.br)) |
| fatalIf(s.matchLengths.init(s.br)) |
| |
| fatalIf(s.decode(seqs)) |
| hist := make([]byte, ref.win) |
| lits := s.literals |
| |
| t.Run(tt.Name, func(t *testing.T) { |
| s.literals = lits |
| if len(s.out) > 0 { |
| s.out = s.out[:0] |
| } |
| err := s.execute(seqs, hist) |
| if err != nil { |
| t.Fatal(err) |
| } |
| if len(s.out) != s.seqSize { |
| t.Errorf("want %d != got %d", s.seqSize, len(s.out)) |
| } |
| }) |
| } |
| } |
| |
| func Test_seqdec_decodeSync(t *testing.T) { |
| zr := testCreateZipReader("testdata/seqs.zip", t) |
| tb := t |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| r, err := tt.Open() |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| |
| seqData, err := io.ReadAll(r) |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| var buf = bytes.NewBuffer(seqData) |
| s := readDecoders(tb, buf, ref) |
| |
| lits := s.literals |
| hist := make([]byte, ref.win) |
| t.Run(tt.Name, func(t *testing.T) { |
| fatalIf := func(err error) { |
| if err != nil { |
| t.Fatal(err) |
| } |
| } |
| fatalIf(s.br.init(buf.Bytes())) |
| fatalIf(s.litLengths.init(s.br)) |
| fatalIf(s.offsets.init(s.br)) |
| fatalIf(s.matchLengths.init(s.br)) |
| s.literals = lits |
| if len(s.out) > 0 { |
| s.out = s.out[:0] |
| } |
| err := s.decodeSync(hist) |
| if err != nil { |
| t.Fatal(err) |
| } |
| }) |
| } |
| } |
| |
| func Benchmark_seqdec_decode(b *testing.B) { |
| benchmark_seqdec_decode(b) |
| } |
| |
| func benchmark_seqdec_decode(b *testing.B) { |
| zr := testCreateZipReader("testdata/seqs.zip", b) |
| tb := b |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| r, err := tt.Open() |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| |
| seqData, err := io.ReadAll(r) |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| var buf = bytes.NewBuffer(seqData) |
| s := readDecoders(tb, buf, ref) |
| seqs := make([]seqVals, ref.n) |
| |
| b.Run(tt.Name, func(b *testing.B) { |
| fatalIf := func(err error) { |
| if err != nil { |
| b.Fatal(err) |
| } |
| } |
| b.ReportAllocs() |
| b.ResetTimer() |
| t := time.Now() |
| decoded := 0 |
| remain := uint(0) |
| for i := 0; i < b.N; i++ { |
| fatalIf(s.br.init(buf.Bytes())) |
| fatalIf(s.litLengths.init(s.br)) |
| fatalIf(s.offsets.init(s.br)) |
| fatalIf(s.matchLengths.init(s.br)) |
| remain = s.br.remain() |
| err := s.decode(seqs) |
| if err != nil { |
| b.Fatal(err) |
| } |
| decoded += ref.n |
| } |
| b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s") |
| b.ReportMetric(float64(remain)/float64(s.nSeqs), "b/seq") |
| }) |
| } |
| } |
| |
| func Benchmark_seqdec_execute(b *testing.B) { |
| zr := testCreateZipReader("testdata/seqs.zip", b) |
| tb := b |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| r, err := tt.Open() |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| |
| seqData, err := io.ReadAll(r) |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| var buf = bytes.NewBuffer(seqData) |
| s := readDecoders(tb, buf, ref) |
| seqs := make([]seqVals, ref.n) |
| |
| fatalIf := func(err error) { |
| if err != nil { |
| b.Fatal(err) |
| } |
| } |
| fatalIf(s.br.init(buf.Bytes())) |
| fatalIf(s.litLengths.init(s.br)) |
| fatalIf(s.offsets.init(s.br)) |
| fatalIf(s.matchLengths.init(s.br)) |
| |
| fatalIf(s.decode(seqs)) |
| hist := make([]byte, ref.win) |
| lits := s.literals |
| |
| b.Run(tt.Name, func(b *testing.B) { |
| b.ReportAllocs() |
| b.SetBytes(int64(s.seqSize)) |
| b.ResetTimer() |
| t := time.Now() |
| decoded := 0 |
| for i := 0; i < b.N; i++ { |
| s.literals = lits |
| if len(s.out) > 0 { |
| s.out = s.out[:0] |
| } |
| fatalIf(s.execute(seqs, hist)) |
| decoded += ref.n |
| } |
| b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s") |
| }) |
| } |
| } |
| |
| func Benchmark_seqdec_decodeSync(b *testing.B) { |
| zr := testCreateZipReader("testdata/seqs.zip", b) |
| tb := b |
| for _, tt := range zr.File { |
| var ref testSequence |
| if !ref.parse(tt.Name) { |
| tb.Skip("unable to parse:", tt.Name) |
| } |
| r, err := tt.Open() |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| |
| seqData, err := io.ReadAll(r) |
| if err != nil { |
| tb.Error(err) |
| return |
| } |
| var buf = bytes.NewBuffer(seqData) |
| s := readDecoders(tb, buf, ref) |
| |
| lits := s.literals |
| hist := make([]byte, ref.win) |
| b.Run(tt.Name, func(b *testing.B) { |
| fatalIf := func(err error) { |
| if err != nil { |
| b.Fatal(err) |
| } |
| } |
| decoded := 0 |
| b.ReportAllocs() |
| b.ResetTimer() |
| t := time.Now() |
| |
| for i := 0; i < b.N; i++ { |
| fatalIf(s.br.init(buf.Bytes())) |
| fatalIf(s.litLengths.init(s.br)) |
| fatalIf(s.offsets.init(s.br)) |
| fatalIf(s.matchLengths.init(s.br)) |
| s.literals = lits |
| if len(s.out) > 0 { |
| s.out = s.out[:0] |
| } |
| err := s.decodeSync(hist) |
| if err != nil { |
| b.Fatal(err) |
| } |
| b.SetBytes(int64(len(s.out))) |
| decoded += ref.n |
| } |
| b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s") |
| }) |
| } |
| } |
| |
| func testCreateZipReader(path string, tb testing.TB) *zip.Reader { |
| failOnError := func(err error) { |
| if err != nil { |
| tb.Fatal(err) |
| } |
| } |
| |
| data, err := os.ReadFile(path) |
| failOnError(err) |
| |
| zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) |
| failOnError(err) |
| |
| return zr |
| } |