s2/encode_best.go - external/github.com/klauspost/compress - Git at Google

 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
 // Copyright (c) 2019 Klaus Post. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package s2

 import (
 	"fmt"
 	"math"
 	"math/bits"
 )

 // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
 //
 // It also assumes that:
 //
 //	len(dst) >= MaxEncodedLen(len(src)) &&
 //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
 	// Initialize the hash tables.
 	const (
 		// Long hash matches.
 		lTableBits    = bestLongTableBits
 		maxLTableSize = bestLongTableSize

 		// Short hash matches.
 		sTableBits    = bestShortTableBits
 		maxSTableSize = bestShortTableSize

 		inputMargin = 8 + 2

 		debug = false
 	)

 	// sLimit is when to stop looking for offset/length copies. The inputMargin
 	// lets us use a fast path for emitLiteral in the main loop, while we are
 	// looking for copies.
 	sLimit := len(src) - inputMargin
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}
 	sLimitDict := min(len(src)-inputMargin, MaxDictSrcOffset-inputMargin)

 	tbl := getBestTables()
 	lTable := &tbl.lTable
 	sTable := &tbl.sTable
 	defer bestTablePool.Put(tbl)

 	// Bail if we can't compress to at least this.
 	dstLimit := len(src) - 5

 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := 0

 	// The encoded form must start with a literal, as there are no previous
 	// bytes to copy, so we start looking for hash matches at s == 1.
 	s := 1
 	repeat := 1
 	if dict != nil {
 		dict.initBest()
 		s = 0
 		repeat = len(dict.dict) - dict.repeat
 	}
 	cv := load64(src, s)

 	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
 	const lowbitMask = 0xffffffff
 	getCur := func(x uint64) int {
 		return int(x & lowbitMask)
 	}
 	getPrev := func(x uint64) int {
 		return int(x >> 32)
 	}
 	const maxSkip = 64

 	for {
 		type match struct {
 			offset    int
 			s         int
 			length    int
 			score     int
 			rep, dict bool
 		}
 		var best match
 		for {
 			// Next src position to check
 			nextS := (s-nextEmit)>>8 + 1
 			if nextS > maxSkip {
 				nextS = s + maxSkip
 			} else {
 				nextS += s
 			}
 			if nextS > sLimit {
 				goto emitRemainder
 			}
 			if dict != nil && s >= MaxDictSrcOffset {
 				dict = nil
 				if repeat > s {
 					repeat = math.MinInt32
 				}
 			}
 			hashL := hash8(cv, lTableBits)
 			hashS := hash4(cv, sTableBits)
 			candidateL := lTable[hashL]
 			candidateS := sTable[hashS]

 			score := func(m match) int {
 				// Matches that are longer forward are penalized since we must emit it as a literal.
 				score := m.length - m.s
 				if nextEmit == m.s {
 					// If we do not have to emit literals, we save 1 byte
 					score++
 				}
 				offset := m.s - m.offset
 				if m.rep {
 					return score - emitRepeatSize(offset, m.length)
 				}
 				return score - emitCopySize(offset, m.length)
 			}

 			matchAt := func(offset, s int, first uint32, rep bool) match {
 				if best.length != 0 && best.s-best.offset == s-offset {
 					// Don't retest if we have the same offset.
 					return match{offset: offset, s: s}
 				}
 				if load32(src, offset) != first {
 					return match{offset: offset, s: s}
 				}
 				m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
 				s += 4
 				for s < len(src) {
 					if len(src)-s < 8 {
 						if src[s] == src[m.length] {
 							m.length++
 							s++
 							continue
 						}
 						break
 					}
 					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
 						m.length += bits.TrailingZeros64(diff) >> 3
 						break
 					}
 					s += 8
 					m.length += 8
 				}
 				m.length -= offset
 				m.score = score(m)
 				if m.score <= -m.s {
 					// Eliminate if no savings, we might find a better one.
 					m.length = 0
 				}
 				return m
 			}
 			matchDict := func(candidate, s int, first uint32, rep bool) match {
 				if s >= MaxDictSrcOffset {
 					return match{offset: candidate, s: s}
 				}
 				// Calculate offset as if in continuous array with s
 				offset := -len(dict.dict) + candidate
 				if best.length != 0 && best.s-best.offset == s-offset && !rep {
 					// Don't retest if we have the same offset.
 					return match{offset: offset, s: s}
 				}

 				if load32(dict.dict, candidate) != first {
 					return match{offset: offset, s: s}
 				}
 				m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
 				s += 4
 				if !rep {
 					for s < sLimitDict && m.length < len(dict.dict) {
 						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
 							if src[s] == dict.dict[m.length] {
 								m.length++
 								s++
 								continue
 							}
 							break
 						}
 						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
 							m.length += bits.TrailingZeros64(diff) >> 3
 							break
 						}
 						s += 8
 						m.length += 8
 					}
 				} else {
 					for s < len(src) && m.length < len(dict.dict) {
 						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
 							if src[s] == dict.dict[m.length] {
 								m.length++
 								s++
 								continue
 							}
 							break
 						}
 						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
 							m.length += bits.TrailingZeros64(diff) >> 3
 							break
 						}
 						s += 8
 						m.length += 8
 					}
 				}
 				m.length -= candidate
 				m.score = score(m)
 				if m.score <= -m.s {
 					// Eliminate if no savings, we might find a better one.
 					m.length = 0
 				}
 				return m
 			}

 			bestOf := func(a, b match) match {
 				if b.length == 0 {
 					return a
 				}
 				if a.length == 0 {
 					return b
 				}
 				as := a.score + b.s
 				bs := b.score + a.s
 				if as >= bs {
 					return a
 				}
 				return b
 			}

 			if s > 0 {
 				best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
 				best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
 				best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
 			}
 			if dict != nil {
 				candidateL := dict.bestTableLong[hashL]
 				candidateS := dict.bestTableShort[hashS]
 				best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
 				best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
 				best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
 				best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
 			}
 			{
 				if (dict == nil || repeat <= s) && repeat > 0 {
 					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
 				} else if s-repeat < -4 && dict != nil {
 					candidate := len(dict.dict) - (repeat - s)
 					best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
 					candidate++
 					best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
 				}

 				if best.length > 0 {
 					hashS := hash4(cv>>8, sTableBits)
 					// s+1
 					nextShort := sTable[hashS]
 					s := s + 1
 					cv := load64(src, s)
 					hashL := hash8(cv, lTableBits)
 					nextLong := lTable[hashL]
 					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
 					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
 					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
 					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))

 					// Dict at + 1
 					if dict != nil {
 						candidateL := dict.bestTableLong[hashL]
 						candidateS := dict.bestTableShort[hashS]

 						best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
 						best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
 					}

 					// s+2
 					if true {
 						hashS := hash4(cv>>8, sTableBits)

 						nextShort = sTable[hashS]
 						s++
 						cv = load64(src, s)
 						hashL := hash8(cv, lTableBits)
 						nextLong = lTable[hashL]

 						if (dict == nil || repeat <= s) && repeat > 0 {
 							// Repeat at + 2
 							best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
 						} else if repeat-s > 4 && dict != nil {
 							candidate := len(dict.dict) - (repeat - s)
 							best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
 						}
 						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
 						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
 						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
 						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))

 						// Dict at +2
 						// Very small gain
 						if dict != nil {
 							candidateL := dict.bestTableLong[hashL]
 							candidateS := dict.bestTableShort[hashS]

 							best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
 							best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
 						}
 					}
 					// Search for a match at best match end, see if that is better.
 					// Allow some bytes at the beginning to mismatch.
 					// Sweet spot is around 1-2 bytes, but depends on input.
 					// The skipped bytes are tested in Extend backwards,
 					// and still picked up as part of the match if they do.
 					const skipBeginning = 2
 					const skipEnd = 1
 					if sAt := best.s + best.length - skipEnd; sAt < sLimit {

 						sBack := best.s + skipBeginning - skipEnd
 						backL := best.length - skipBeginning
 						// Load initial values
 						cv = load64(src, sBack)

 						// Grab candidates...
 						next := lTable[hash8(load64(src, sAt), lTableBits)]

 						if checkAt := getCur(next) - backL; checkAt > 0 {
 							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 						}
 						if checkAt := getPrev(next) - backL; checkAt > 0 {
 							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 						}
 						// Disabled: Extremely small gain
 						if false {
 							next = sTable[hash4(load64(src, sAt), sTableBits)]
 							if checkAt := getCur(next) - backL; checkAt > 0 {
 								best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 							}
 							if checkAt := getPrev(next) - backL; checkAt > 0 {
 								best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 							}
 						}
 					}
 				}
 			}

 			// Update table
 			lTable[hashL] = uint64(s) | candidateL<<32
 			sTable[hashS] = uint64(s) | candidateS<<32

 			if best.length > 0 {
 				break
 			}

 			cv = load64(src, nextS)
 			s = nextS
 		}

 		// Extend backwards, not needed for repeats...
 		s = best.s
 		if !best.rep && !best.dict {
 			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
 				best.offset--
 				best.length++
 				s--
 			}
 		}
 		if false && best.offset >= s {
 			panic(fmt.Errorf("t %d >= s %d", best.offset, s))
 		}
 		// Bail if we exceed the maximum size.
 		if d+(s-nextEmit) > dstLimit {
 			return 0
 		}

 		base := s
 		offset := s - best.offset
 		s += best.length

 		if offset > 65535 && s-base <= 5 && !best.rep {
 			// Bail if the match is equal or worse to the encoding.
 			s = best.s + 1
 			if s >= sLimit {
 				goto emitRemainder
 			}
 			cv = load64(src, s)
 			continue
 		}
 		if debug && nextEmit != base {
 			fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
 		}
 		d += emitLiteral(dst[d:], src[nextEmit:base])
 		if best.rep {
 			if nextEmit > 0 || best.dict {
 				if debug {
 					fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
 				}
 				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
 				d += emitRepeat(dst[d:], offset, best.length)
 			} else {
 				// First match without dict cannot be a repeat.
 				if debug {
 					fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
 				}
 				d += emitCopy(dst[d:], offset, best.length)
 			}
 		} else {
 			if debug {
 				fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
 			}
 			d += emitCopy(dst[d:], offset, best.length)
 		}
 		repeat = offset

 		nextEmit = s
 		if s >= sLimit {
 			goto emitRemainder
 		}

 		if d > dstLimit {
 			// Do we have space for more, if not bail.
 			return 0
 		}
 		// Fill tables...
 		for i := best.s + 1; i < s; i++ {
 			cv0 := load64(src, i)
 			long0 := hash8(cv0, lTableBits)
 			short0 := hash4(cv0, sTableBits)
 			lTable[long0] = uint64(i) | lTable[long0]<<32
 			sTable[short0] = uint64(i) | sTable[short0]<<32
 		}
 		cv = load64(src, s)
 	}

 emitRemainder:
 	if nextEmit < len(src) {
 		// Bail if we exceed the maximum size.
 		if d+len(src)-nextEmit > dstLimit {
 			return 0
 		}
 		if debug && nextEmit != s {
 			fmt.Println("emitted ", len(src)-nextEmit, "literals")
 		}
 		d += emitLiteral(dst[d:], src[nextEmit:])
 	}
 	return d
 }

 // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
 //
 // It also assumes that:
 //
 //	len(dst) >= MaxEncodedLen(len(src)) &&
 //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 func encodeBlockBestSnappy(dst, src []byte) (d int) {
 	// Initialize the hash tables.
 	const (
 		// Long hash matches.
 		lTableBits    = bestLongTableBits
 		maxLTableSize = bestLongTableSize

 		// Short hash matches.
 		sTableBits    = bestShortTableBits
 		maxSTableSize = bestShortTableSize

 		inputMargin = 8 + 2
 	)

 	// sLimit is when to stop looking for offset/length copies. The inputMargin
 	// lets us use a fast path for emitLiteral in the main loop, while we are
 	// looking for copies.
 	sLimit := len(src) - inputMargin
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}

 	tbl := getBestTables()
 	lTable := &tbl.lTable
 	sTable := &tbl.sTable
 	defer bestTablePool.Put(tbl)

 	// Bail if we can't compress to at least this.
 	dstLimit := len(src) - 5

 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := 0

 	// The encoded form must start with a literal, as there are no previous
 	// bytes to copy, so we start looking for hash matches at s == 1.
 	s := 1
 	cv := load64(src, s)

 	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
 	repeat := 1
 	const lowbitMask = 0xffffffff
 	getCur := func(x uint64) int {
 		return int(x & lowbitMask)
 	}
 	getPrev := func(x uint64) int {
 		return int(x >> 32)
 	}
 	const maxSkip = 64

 	for {
 		type match struct {
 			offset int
 			s      int
 			length int
 			score  int
 		}
 		var best match
 		for {
 			// Next src position to check
 			nextS := (s-nextEmit)>>8 + 1
 			if nextS > maxSkip {
 				nextS = s + maxSkip
 			} else {
 				nextS += s
 			}
 			if nextS > sLimit {
 				goto emitRemainder
 			}
 			hashL := hash8(cv, lTableBits)
 			hashS := hash4(cv, sTableBits)
 			candidateL := lTable[hashL]
 			candidateS := sTable[hashS]

 			score := func(m match) int {
 				// Matches that are longer forward are penalized since we must emit it as a literal.
 				score := m.length - m.s
 				if nextEmit == m.s {
 					// If we do not have to emit literals, we save 1 byte
 					score++
 				}
 				offset := m.s - m.offset

 				return score - emitCopyNoRepeatSize(offset, m.length)
 			}

 			matchAt := func(offset, s int, first uint32) match {
 				if best.length != 0 && best.s-best.offset == s-offset {
 					// Don't retest if we have the same offset.
 					return match{offset: offset, s: s}
 				}
 				if load32(src, offset) != first {
 					return match{offset: offset, s: s}
 				}
 				m := match{offset: offset, s: s, length: 4 + offset}
 				s += 4
 				for s <= sLimit {
 					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
 						m.length += bits.TrailingZeros64(diff) >> 3
 						break
 					}
 					s += 8
 					m.length += 8
 				}
 				m.length -= offset
 				m.score = score(m)
 				if m.score <= -m.s {
 					// Eliminate if no savings, we might find a better one.
 					m.length = 0
 				}
 				return m
 			}

 			bestOf := func(a, b match) match {
 				if b.length == 0 {
 					return a
 				}
 				if a.length == 0 {
 					return b
 				}
 				as := a.score + b.s
 				bs := b.score + a.s
 				if as >= bs {
 					return a
 				}
 				return b
 			}

 			best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
 			best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
 			best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))

 			{
 				best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
 				if best.length > 0 {
 					// s+1
 					nextShort := sTable[hash4(cv>>8, sTableBits)]
 					s := s + 1
 					cv := load64(src, s)
 					nextLong := lTable[hash8(cv, lTableBits)]
 					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
 					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
 					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
 					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
 					// Repeat at + 2
 					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))

 					// s+2
 					if true {
 						nextShort = sTable[hash4(cv>>8, sTableBits)]
 						s++
 						cv = load64(src, s)
 						nextLong = lTable[hash8(cv, lTableBits)]
 						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
 						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
 						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
 						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
 					}
 					// Search for a match at best match end, see if that is better.
 					if sAt := best.s + best.length; sAt < sLimit {
 						sBack := best.s
 						backL := best.length
 						// Load initial values
 						cv = load64(src, sBack)
 						// Search for mismatch
 						next := lTable[hash8(load64(src, sAt), lTableBits)]
 						//next := sTable[hash4(load64(src, sAt), sTableBits)]

 						if checkAt := getCur(next) - backL; checkAt > 0 {
 							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
 						}
 						if checkAt := getPrev(next) - backL; checkAt > 0 {
 							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
 						}
 					}
 				}
 			}

 			// Update table
 			lTable[hashL] = uint64(s) | candidateL<<32
 			sTable[hashS] = uint64(s) | candidateS<<32

 			if best.length > 0 {
 				break
 			}

 			cv = load64(src, nextS)
 			s = nextS
 		}

 		// Extend backwards, not needed for repeats...
 		s = best.s
 		if true {
 			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
 				best.offset--
 				best.length++
 				s--
 			}
 		}
 		if false && best.offset >= s {
 			panic(fmt.Errorf("t %d >= s %d", best.offset, s))
 		}
 		// Bail if we exceed the maximum size.
 		if d+(s-nextEmit) > dstLimit {
 			return 0
 		}

 		base := s
 		offset := s - best.offset

 		s += best.length

 		if offset > 65535 && s-base <= 5 {
 			// Bail if the match is equal or worse to the encoding.
 			s = best.s + 1
 			if s >= sLimit {
 				goto emitRemainder
 			}
 			cv = load64(src, s)
 			continue
 		}
 		d += emitLiteral(dst[d:], src[nextEmit:base])
 		d += emitCopyNoRepeat(dst[d:], offset, best.length)
 		repeat = offset

 		nextEmit = s
 		if s >= sLimit {
 			goto emitRemainder
 		}

 		if d > dstLimit {
 			// Do we have space for more, if not bail.
 			return 0
 		}
 		// Fill tables...
 		for i := best.s + 1; i < s; i++ {
 			cv0 := load64(src, i)
 			long0 := hash8(cv0, lTableBits)
 			short0 := hash4(cv0, sTableBits)
 			lTable[long0] = uint64(i) | lTable[long0]<<32
 			sTable[short0] = uint64(i) | sTable[short0]<<32
 		}
 		cv = load64(src, s)
 	}

 emitRemainder:
 	if nextEmit < len(src) {
 		// Bail if we exceed the maximum size.
 		if d+len(src)-nextEmit > dstLimit {
 			return 0
 		}
 		d += emitLiteral(dst[d:], src[nextEmit:])
 	}
 	return d
 }

 // emitCopySize returns the size to encode the offset+length
 //
 // It assumes that:
 //
 //	1 <= offset && offset <= math.MaxUint32
 //	4 <= length && length <= 1 << 24
 func emitCopySize(offset, length int) int {
 	if offset >= 65536 {
 		i := 0
 		if length > 64 {
 			length -= 64
 			if length >= 4 {
 				// Emit remaining as repeats
 				return 5 + emitRepeatSize(offset, length)
 			}
 			i = 5
 		}
 		if length == 0 {
 			return i
 		}
 		return i + 5
 	}

 	// Offset no more than 2 bytes.
 	if length > 64 {
 		if offset < 2048 {
 			// Emit 8 bytes, then rest as repeats...
 			return 2 + emitRepeatSize(offset, length-8)
 		}
 		// Emit remaining as repeats, at least 4 bytes remain.
 		return 3 + emitRepeatSize(offset, length-60)
 	}
 	if length >= 12 || offset >= 2048 {
 		return 3
 	}
 	// Emit the remaining copy, encoded as 2 bytes.
 	return 2
 }

 // emitCopyNoRepeatSize returns the size to encode the offset+length
 //
 // It assumes that:
 //
 //	1 <= offset && offset <= math.MaxUint32
 //	4 <= length && length <= 1 << 24
 func emitCopyNoRepeatSize(offset, length int) int {
 	if offset >= 65536 {
 		return 5 + 5*(length/64)
 	}

 	// Offset no more than 2 bytes.
 	if length > 64 {
 		// Emit remaining as repeats, at least 4 bytes remain.
 		return 3 + 3*(length/60)
 	}
 	if length >= 12 || offset >= 2048 {
 		return 3
 	}
 	// Emit the remaining copy, encoded as 2 bytes.
 	return 2
 }

 // emitRepeatSize returns the number of bytes required to encode a repeat.
 // Length must be at least 4 and < 1<<24
 func emitRepeatSize(offset, length int) int {
 	// Repeat offset, make length cheaper
 	if length <= 4+4 || (length < 8+4 && offset < 2048) {
 		return 2
 	}
 	if length < (1<<8)+4+4 {
 		return 3
 	}
 	if length < (1<<16)+(1<<8)+4 {
 		return 4
 	}
 	const maxRepeat = (1 << 24) - 1
 	length -= (1 << 16) - 4
 	left := 0
 	if length > maxRepeat {
 		left = length - maxRepeat + 4
 	}
 	if left > 0 {
 		return 5 + emitRepeatSize(offset, left)
 	}
 	return 5
 }
	// Copyright 2016 The Snappy-Go Authors. All rights reserved.
	// Copyright (c) 2019 Klaus Post. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package s2

	import (
	"fmt"
	"math"
	"math/bits"
	)

	// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
	// assumes that the varint-encoded length of the decompressed bytes has already
	// been written.
	//
	// It also assumes that:
	//
	// len(dst) >= MaxEncodedLen(len(src)) &&
	// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
	func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
	// Initialize the hash tables.
	const (
	// Long hash matches.
	lTableBits = bestLongTableBits
	maxLTableSize = bestLongTableSize

	// Short hash matches.
	sTableBits = bestShortTableBits
	maxSTableSize = bestShortTableSize

	inputMargin = 8 + 2

	debug = false
	)

	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	sLimit := len(src) - inputMargin
	if len(src) < minNonLiteralBlockSize {
	return 0
	}
	sLimitDict := min(len(src)-inputMargin, MaxDictSrcOffset-inputMargin)

	tbl := getBestTables()
	lTable := &tbl.lTable
	sTable := &tbl.sTable
	defer bestTablePool.Put(tbl)

	// Bail if we can't compress to at least this.
	dstLimit := len(src) - 5

	// nextEmit is where in src the next emitLiteral should start from.
	nextEmit := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	s := 1
	repeat := 1
	if dict != nil {
	dict.initBest()
	s = 0
	repeat = len(dict.dict) - dict.repeat
	}
	cv := load64(src, s)

	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
	const lowbitMask = 0xffffffff
	getCur := func(x uint64) int {
	return int(x & lowbitMask)
	}
	getPrev := func(x uint64) int {
	return int(x >> 32)
	}
	const maxSkip = 64

	for {
	type match struct {
	offset int
	s int
	length int
	score int
	rep, dict bool
	}
	var best match
	for {
	// Next src position to check
	nextS := (s-nextEmit)>>8 + 1
	if nextS > maxSkip {
	nextS = s + maxSkip
	} else {
	nextS += s
	}
	if nextS > sLimit {
	goto emitRemainder
	}
	if dict != nil && s >= MaxDictSrcOffset {
	dict = nil
	if repeat > s {
	repeat = math.MinInt32
	}
	}
	hashL := hash8(cv, lTableBits)
	hashS := hash4(cv, sTableBits)
	candidateL := lTable[hashL]
	candidateS := sTable[hashS]

	score := func(m match) int {
	// Matches that are longer forward are penalized since we must emit it as a literal.
	score := m.length - m.s
	if nextEmit == m.s {
	// If we do not have to emit literals, we save 1 byte
	score++
	}
	offset := m.s - m.offset
	if m.rep {
	return score - emitRepeatSize(offset, m.length)
	}
	return score - emitCopySize(offset, m.length)
	}

	matchAt := func(offset, s int, first uint32, rep bool) match {
	if best.length != 0 && best.s-best.offset == s-offset {
	// Don't retest if we have the same offset.
	return match{offset: offset, s: s}
	}
	if load32(src, offset) != first {
	return match{offset: offset, s: s}
	}
	m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
	s += 4
	for s < len(src) {
	if len(src)-s < 8 {
	if src[s] == src[m.length] {
	m.length++
	s++
	continue
	}
	break
	}
	if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
	m.length += bits.TrailingZeros64(diff) >> 3
	break
	}
	s += 8
	m.length += 8
	}
	m.length -= offset
	m.score = score(m)
	if m.score <= -m.s {
	// Eliminate if no savings, we might find a better one.
	m.length = 0
	}
	return m
	}
	matchDict := func(candidate, s int, first uint32, rep bool) match {
	if s >= MaxDictSrcOffset {
	return match{offset: candidate, s: s}
	}
	// Calculate offset as if in continuous array with s
	offset := -len(dict.dict) + candidate
	if best.length != 0 && best.s-best.offset == s-offset && !rep {
	// Don't retest if we have the same offset.
	return match{offset: offset, s: s}
	}

	if load32(dict.dict, candidate) != first {
	return match{offset: offset, s: s}
	}
	m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
	s += 4
	if !rep {
	for s < sLimitDict && m.length < len(dict.dict) {
	if len(src)-s < 8 \|\| len(dict.dict)-m.length < 8 {
	if src[s] == dict.dict[m.length] {
	m.length++
	s++
	continue
	}
	break
	}
	if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
	m.length += bits.TrailingZeros64(diff) >> 3
	break
	}
	s += 8
	m.length += 8
	}
	} else {
	for s < len(src) && m.length < len(dict.dict) {
	if len(src)-s < 8 \|\| len(dict.dict)-m.length < 8 {
	if src[s] == dict.dict[m.length] {
	m.length++
	s++
	continue
	}
	break
	}
	if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
	m.length += bits.TrailingZeros64(diff) >> 3
	break
	}
	s += 8
	m.length += 8
	}
	}
	m.length -= candidate
	m.score = score(m)
	if m.score <= -m.s {
	// Eliminate if no savings, we might find a better one.
	m.length = 0
	}
	return m
	}

	bestOf := func(a, b match) match {
	if b.length == 0 {
	return a
	}
	if a.length == 0 {
	return b
	}
	as := a.score + b.s
	bs := b.score + a.s
	if as >= bs {
	return a
	}
	return b
	}

	if s > 0 {
	best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
	best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
	best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
	}
	if dict != nil {
	candidateL := dict.bestTableLong[hashL]
	candidateS := dict.bestTableShort[hashS]
	best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
	best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
	best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
	best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
	}
	{
	if (dict == nil \|\| repeat <= s) && repeat > 0 {
	best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
	} else if s-repeat < -4 && dict != nil {
	candidate := len(dict.dict) - (repeat - s)
	best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
	candidate++
	best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
	}

	if best.length > 0 {
	hashS := hash4(cv>>8, sTableBits)
	// s+1
	nextShort := sTable[hashS]
	s := s + 1
	cv := load64(src, s)
	hashL := hash8(cv, lTableBits)
	nextLong := lTable[hashL]
	best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
	best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
	best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
	best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))

	// Dict at + 1
	if dict != nil {
	candidateL := dict.bestTableLong[hashL]
	candidateS := dict.bestTableShort[hashS]

	best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
	best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
	}

	// s+2
	if true {
	hashS := hash4(cv>>8, sTableBits)

	nextShort = sTable[hashS]
	s++
	cv = load64(src, s)
	hashL := hash8(cv, lTableBits)
	nextLong = lTable[hashL]

	if (dict == nil \|\| repeat <= s) && repeat > 0 {
	// Repeat at + 2
	best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
	} else if repeat-s > 4 && dict != nil {
	candidate := len(dict.dict) - (repeat - s)
	best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
	}
	best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
	best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
	best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
	best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))

	// Dict at +2
	// Very small gain
	if dict != nil {
	candidateL := dict.bestTableLong[hashL]
	candidateS := dict.bestTableShort[hashS]

	best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
	best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
	}
	}
	// Search for a match at best match end, see if that is better.
	// Allow some bytes at the beginning to mismatch.
	// Sweet spot is around 1-2 bytes, but depends on input.
	// The skipped bytes are tested in Extend backwards,
	// and still picked up as part of the match if they do.
	const skipBeginning = 2
	const skipEnd = 1
	if sAt := best.s + best.length - skipEnd; sAt < sLimit {

	sBack := best.s + skipBeginning - skipEnd
	backL := best.length - skipBeginning
	// Load initial values
	cv = load64(src, sBack)

	// Grab candidates...
	next := lTable[hash8(load64(src, sAt), lTableBits)]

	if checkAt := getCur(next) - backL; checkAt > 0 {
	best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
	}
	if checkAt := getPrev(next) - backL; checkAt > 0 {
	best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
	}
	// Disabled: Extremely small gain
	if false {
	next = sTable[hash4(load64(src, sAt), sTableBits)]
	if checkAt := getCur(next) - backL; checkAt > 0 {
	best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
	}
	if checkAt := getPrev(next) - backL; checkAt > 0 {
	best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
	}
	}
	}
	}
	}

	// Update table
	lTable[hashL] = uint64(s) \| candidateL<<32
	sTable[hashS] = uint64(s) \| candidateS<<32

	if best.length > 0 {
	break
	}

	cv = load64(src, nextS)
	s = nextS
	}

	// Extend backwards, not needed for repeats...
	s = best.s
	if !best.rep && !best.dict {
	for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
	best.offset--
	best.length++
	s--
	}
	}
	if false && best.offset >= s {
	panic(fmt.Errorf("t %d >= s %d", best.offset, s))
	}
	// Bail if we exceed the maximum size.
	if d+(s-nextEmit) > dstLimit {
	return 0
	}

	base := s
	offset := s - best.offset
	s += best.length

	if offset > 65535 && s-base <= 5 && !best.rep {
	// Bail if the match is equal or worse to the encoding.
	s = best.s + 1
	if s >= sLimit {
	goto emitRemainder
	}
	cv = load64(src, s)
	continue
	}
	if debug && nextEmit != base {
	fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
	}
	d += emitLiteral(dst[d:], src[nextEmit:base])
	if best.rep {
	if nextEmit > 0 \|\| best.dict {
	if debug {
	fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
	}
	// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
	d += emitRepeat(dst[d:], offset, best.length)
	} else {
	// First match without dict cannot be a repeat.
	if debug {
	fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
	}
	d += emitCopy(dst[d:], offset, best.length)
	}
	} else {
	if debug {
	fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
	}
	d += emitCopy(dst[d:], offset, best.length)
	}
	repeat = offset

	nextEmit = s
	if s >= sLimit {
	goto emitRemainder
	}

	if d > dstLimit {
	// Do we have space for more, if not bail.
	return 0
	}
	// Fill tables...
	for i := best.s + 1; i < s; i++ {
	cv0 := load64(src, i)
	long0 := hash8(cv0, lTableBits)
	short0 := hash4(cv0, sTableBits)
	lTable[long0] = uint64(i) \| lTable[long0]<<32
	sTable[short0] = uint64(i) \| sTable[short0]<<32
	}
	cv = load64(src, s)
	}

	emitRemainder:
	if nextEmit < len(src) {
	// Bail if we exceed the maximum size.
	if d+len(src)-nextEmit > dstLimit {
	return 0
	}
	if debug && nextEmit != s {
	fmt.Println("emitted ", len(src)-nextEmit, "literals")
	}
	d += emitLiteral(dst[d:], src[nextEmit:])
	}
	return d
	}

	// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
	// assumes that the varint-encoded length of the decompressed bytes has already
	// been written.
	//
	// It also assumes that:
	//
	// len(dst) >= MaxEncodedLen(len(src)) &&
	// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
	func encodeBlockBestSnappy(dst, src []byte) (d int) {
	// Initialize the hash tables.
	const (
	// Long hash matches.
	lTableBits = bestLongTableBits
	maxLTableSize = bestLongTableSize

	// Short hash matches.
	sTableBits = bestShortTableBits
	maxSTableSize = bestShortTableSize

	inputMargin = 8 + 2
	)

	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	sLimit := len(src) - inputMargin
	if len(src) < minNonLiteralBlockSize {
	return 0
	}

	tbl := getBestTables()
	lTable := &tbl.lTable
	sTable := &tbl.sTable
	defer bestTablePool.Put(tbl)

	// Bail if we can't compress to at least this.
	dstLimit := len(src) - 5

	// nextEmit is where in src the next emitLiteral should start from.
	nextEmit := 0

	// The encoded form must start with a literal, as there are no previous
	// bytes to copy, so we start looking for hash matches at s == 1.
	s := 1
	cv := load64(src, s)

	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
	repeat := 1
	const lowbitMask = 0xffffffff
	getCur := func(x uint64) int {
	return int(x & lowbitMask)
	}
	getPrev := func(x uint64) int {
	return int(x >> 32)
	}
	const maxSkip = 64

	for {
	type match struct {
	offset int
	s int
	length int
	score int
	}
	var best match
	for {
	// Next src position to check
	nextS := (s-nextEmit)>>8 + 1
	if nextS > maxSkip {
	nextS = s + maxSkip
	} else {
	nextS += s
	}
	if nextS > sLimit {
	goto emitRemainder
	}
	hashL := hash8(cv, lTableBits)
	hashS := hash4(cv, sTableBits)
	candidateL := lTable[hashL]
	candidateS := sTable[hashS]

	score := func(m match) int {
	// Matches that are longer forward are penalized since we must emit it as a literal.
	score := m.length - m.s
	if nextEmit == m.s {
	// If we do not have to emit literals, we save 1 byte
	score++
	}
	offset := m.s - m.offset

	return score - emitCopyNoRepeatSize(offset, m.length)
	}

	matchAt := func(offset, s int, first uint32) match {
	if best.length != 0 && best.s-best.offset == s-offset {
	// Don't retest if we have the same offset.
	return match{offset: offset, s: s}
	}
	if load32(src, offset) != first {
	return match{offset: offset, s: s}
	}
	m := match{offset: offset, s: s, length: 4 + offset}
	s += 4
	for s <= sLimit {
	if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
	m.length += bits.TrailingZeros64(diff) >> 3
	break
	}
	s += 8
	m.length += 8
	}
	m.length -= offset
	m.score = score(m)
	if m.score <= -m.s {
	// Eliminate if no savings, we might find a better one.
	m.length = 0
	}
	return m
	}

	bestOf := func(a, b match) match {
	if b.length == 0 {
	return a
	}
	if a.length == 0 {
	return b
	}
	as := a.score + b.s
	bs := b.score + a.s
	if as >= bs {
	return a
	}
	return b
	}

	best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
	best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
	best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))

	{
	best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
	if best.length > 0 {
	// s+1
	nextShort := sTable[hash4(cv>>8, sTableBits)]
	s := s + 1
	cv := load64(src, s)
	nextLong := lTable[hash8(cv, lTableBits)]
	best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
	best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
	best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
	best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
	// Repeat at + 2
	best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))

	// s+2
	if true {
	nextShort = sTable[hash4(cv>>8, sTableBits)]
	s++
	cv = load64(src, s)
	nextLong = lTable[hash8(cv, lTableBits)]
	best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
	best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
	best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
	best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
	}
	// Search for a match at best match end, see if that is better.
	if sAt := best.s + best.length; sAt < sLimit {
	sBack := best.s
	backL := best.length
	// Load initial values
	cv = load64(src, sBack)
	// Search for mismatch
	next := lTable[hash8(load64(src, sAt), lTableBits)]
	//next := sTable[hash4(load64(src, sAt), sTableBits)]

	if checkAt := getCur(next) - backL; checkAt > 0 {
	best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
	}
	if checkAt := getPrev(next) - backL; checkAt > 0 {
	best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
	}
	}
	}
	}

	// Update table
	lTable[hashL] = uint64(s) \| candidateL<<32
	sTable[hashS] = uint64(s) \| candidateS<<32

	if best.length > 0 {
	break
	}

	cv = load64(src, nextS)
	s = nextS
	}

	// Extend backwards, not needed for repeats...
	s = best.s
	if true {
	for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
	best.offset--
	best.length++
	s--
	}
	}
	if false && best.offset >= s {
	panic(fmt.Errorf("t %d >= s %d", best.offset, s))
	}
	// Bail if we exceed the maximum size.
	if d+(s-nextEmit) > dstLimit {
	return 0
	}

	base := s
	offset := s - best.offset

	s += best.length

	if offset > 65535 && s-base <= 5 {
	// Bail if the match is equal or worse to the encoding.
	s = best.s + 1
	if s >= sLimit {
	goto emitRemainder
	}
	cv = load64(src, s)
	continue
	}
	d += emitLiteral(dst[d:], src[nextEmit:base])
	d += emitCopyNoRepeat(dst[d:], offset, best.length)
	repeat = offset

	nextEmit = s
	if s >= sLimit {
	goto emitRemainder
	}

	if d > dstLimit {
	// Do we have space for more, if not bail.
	return 0
	}
	// Fill tables...
	for i := best.s + 1; i < s; i++ {
	cv0 := load64(src, i)
	long0 := hash8(cv0, lTableBits)
	short0 := hash4(cv0, sTableBits)
	lTable[long0] = uint64(i) \| lTable[long0]<<32
	sTable[short0] = uint64(i) \| sTable[short0]<<32
	}
	cv = load64(src, s)
	}

	emitRemainder:
	if nextEmit < len(src) {
	// Bail if we exceed the maximum size.
	if d+len(src)-nextEmit > dstLimit {
	return 0
	}
	d += emitLiteral(dst[d:], src[nextEmit:])
	}
	return d
	}

	// emitCopySize returns the size to encode the offset+length
	//
	// It assumes that:
	//
	// 1 <= offset && offset <= math.MaxUint32
	// 4 <= length && length <= 1 << 24
	func emitCopySize(offset, length int) int {
	if offset >= 65536 {
	i := 0
	if length > 64 {
	length -= 64
	if length >= 4 {
	// Emit remaining as repeats
	return 5 + emitRepeatSize(offset, length)
	}
	i = 5
	}
	if length == 0 {
	return i
	}
	return i + 5
	}

	// Offset no more than 2 bytes.
	if length > 64 {
	if offset < 2048 {
	// Emit 8 bytes, then rest as repeats...
	return 2 + emitRepeatSize(offset, length-8)
	}
	// Emit remaining as repeats, at least 4 bytes remain.
	return 3 + emitRepeatSize(offset, length-60)
	}
	if length >= 12 \|\| offset >= 2048 {
	return 3
	}
	// Emit the remaining copy, encoded as 2 bytes.
	return 2
	}

	// emitCopyNoRepeatSize returns the size to encode the offset+length
	//
	// It assumes that:
	//
	// 1 <= offset && offset <= math.MaxUint32
	// 4 <= length && length <= 1 << 24
	func emitCopyNoRepeatSize(offset, length int) int {
	if offset >= 65536 {
	return 5 + 5*(length/64)
	}

	// Offset no more than 2 bytes.
	if length > 64 {
	// Emit remaining as repeats, at least 4 bytes remain.
	return 3 + 3*(length/60)
	}
	if length >= 12 \|\| offset >= 2048 {
	return 3
	}
	// Emit the remaining copy, encoded as 2 bytes.
	return 2
	}

	// emitRepeatSize returns the number of bytes required to encode a repeat.
	// Length must be at least 4 and < 1<<24
	func emitRepeatSize(offset, length int) int {
	// Repeat offset, make length cheaper
	if length <= 4+4 \|\| (length < 8+4 && offset < 2048) {
	return 2
	}
	if length < (1<<8)+4+4 {
	return 3
	}
	if length < (1<<16)+(1<<8)+4 {
	return 4
	}
	const maxRepeat = (1 << 24) - 1
	length -= (1 << 16) - 4
	left := 0
	if length > maxRepeat {
	left = length - maxRepeat + 4
	}
	if left > 0 {
	return 5 + emitRepeatSize(offset, left)
	}
	return 5
	}