improved speed of the Sequences converter
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 7cfeda2..2c4c4b0 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1084,7 +1084,7 @@
*
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
- * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+ * sequence provider perspective. For example, ZSTD_compressSequences() does not
* use this 'rep' field at all (as of now).
*/
} ZSTD_Sequence;
@@ -1331,7 +1331,7 @@
<pre><b>ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
ZSTDLIB_STATIC_API size_t
ZSTD_generateSequences(ZSTD_CCtx* zc,
- ZSTD_Sequence* outSeqs, size_t outSeqsSize,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize);
</b><p> WARNING: This function is meant for debugging and informational purposes ONLY!
Its implementation is flawed, and it will be deleted in a future version.
@@ -1345,7 +1345,7 @@
@param zc The compression context to be used for ZSTD_compress2(). Set any
compression parameters you need on this context.
@param outSeqs The output sequences buffer of size @p outSeqsSize
- @param outSeqsSize The size of the output sequences buffer.
+ @param outSeqsCapacity The size of the output sequences buffer.
ZSTD_sequenceBound(srcSize) is an upper bound on the number
of sequences that can be generated.
@param src The source buffer to generate sequences from of size @p srcSize.
@@ -1392,11 +1392,17 @@
the block size derived from the cctx, and sequences may be split. This is the default setting.
If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
- block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
- If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
- behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
- specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes
+ using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit
+ can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation.
+ By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10).
+ ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction.
+
+ If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined
+ behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for
+ specifics regarding offset/matchlength requirements) and then bail out and return an error.
In addition to the two adjustable experimental params, there are other important cctx params.
- ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
@@ -1414,19 +1420,21 @@
<pre><b>ZSTDLIB_STATIC_API size_t
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
- const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
- const void* literals, size_t litSize, size_t srcSize);
+ const ZSTD_Sequence* inSeqs, size_t nbSequences,
+ const void* literals, size_t litSize);
</b><p> This is a variant of ZSTD_compressSequences() which,
instead of receiving (src,srcSize) as input parameter, receives (literals,litSize),
- aka all literals already extracted and laid out into a single continuous buffer.
+ aka all the literals, already extracted and laid out into a single continuous buffer.
This can be useful if the process generating the sequences also happens to generate the buffer of literals,
thus skipping an extraction + caching stage.
- It's essentially a speed optimization when the right conditions are met,
- but it also is restricted by the following limitations:
+ It's a speed optimization, useful when the right conditions are met,
+ but it also features the following limitations:
- Only supports explicit delimiter mode
- Not compatible with frame checksum, which must disabled
- - Can fail when unable to compress sufficiently
- Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
+ - Does not write the content size in frame header
+ - If any block is incompressible, will fail and return an error
+ - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
+ - the buffer @literals must be larger than @litSize by at least 8 bytes.
@return : final compressed size, or a ZSTD error code.
</p></pre><BR>
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index c4f699e..70a9731 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2207,7 +2207,7 @@
zc->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
(unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
- zc->blockSize = blockSize;
+ zc->blockSizeMax = blockSize;
XXH64_reset(&zc->xxhState, 0);
zc->stage = ZSTDcs_init;
@@ -4293,8 +4293,8 @@
lastBlock, 0 /* isPartition */);
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
- assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
- assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
+ assert(zc->blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
+ assert(cSizeSingleBlock <= zc->blockSizeMax + ZSTD_blockHeaderSize);
return cSizeSingleBlock;
}
@@ -4328,7 +4328,7 @@
dstCapacity -= cSizeChunk;
cSize += cSizeChunk;
*currSeqStore = *nextSeqStore;
- assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
+ assert(cSizeChunk <= zc->blockSizeMax + ZSTD_blockHeaderSize);
}
/* cRep and dRep may have diverged during the compression.
* If so, we use the dRep repcodes for the next block.
@@ -4580,7 +4580,7 @@
const void* src, size_t srcSize,
U32 lastFrameChunk)
{
- size_t blockSizeMax = cctx->blockSize;
+ size_t blockSizeMax = cctx->blockSizeMax;
size_t remaining = srcSize;
const BYTE* ip = (const BYTE*)src;
BYTE* const ostart = (BYTE*)dst;
@@ -4816,7 +4816,7 @@
src, (BYTE const*)src + srcSize);
}
- DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
+ DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSizeMax);
{ size_t const cSize = frame ?
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
@@ -6070,11 +6070,11 @@
static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
{
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
- return cctx->blockSize - cctx->stableIn_notConsumed;
+ return cctx->blockSizeMax - cctx->stableIn_notConsumed;
}
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
{ size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
- if (hintInSize==0) hintInSize = cctx->blockSize;
+ if (hintInSize==0) hintInSize = cctx->blockSizeMax;
return hintInSize;
}
}
@@ -6162,7 +6162,7 @@
} else {
assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
if ( (flushMode == ZSTD_e_continue)
- && ( (size_t)(iend - ip) < zcs->blockSize) ) {
+ && ( (size_t)(iend - ip) < zcs->blockSizeMax) ) {
/* can't compress a full block : stop here */
zcs->stableIn_notConsumed = (size_t)(iend - ip);
ip = iend; /* pretend to have consumed input */
@@ -6181,7 +6181,7 @@
size_t cSize;
size_t oSize = (size_t)(oend-op);
size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
- : MIN((size_t)(iend - ip), zcs->blockSize);
+ : MIN((size_t)(iend - ip), zcs->blockSizeMax);
if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
cDst = op; /* compress into output buffer, to skip flush stage */
else
@@ -6196,9 +6196,9 @@
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
zcs->frameEnded = lastBlock;
/* prepare next block */
- zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+ zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSizeMax;
if (zcs->inBuffTarget > zcs->inBuffSize)
- zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+ zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSizeMax;
DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
(unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
if (!lastBlock)
@@ -6413,7 +6413,7 @@
/* for small input: avoid automatic flush on reaching end of block, since
* it would require to add a 3-bytes null block to end frame
*/
- cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
+ cctx->inBuffTarget = cctx->blockSizeMax + (cctx->blockSizeMax == pledgedSrcSize);
} else {
cctx->inBuffTarget = 0;
}
@@ -6951,7 +6951,7 @@
size_t compressedSeqsSize;
size_t cBlockSize;
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
- cctx->blockSize, remaining,
+ cctx->blockSizeMax, remaining,
inSeqs, inSeqsSize, seqPos);
U32 const lastBlock = (blockSize == remaining);
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
@@ -7093,167 +7093,112 @@
}
/*
- * seqPos must end on an explicit block delimiter
- * @blockSize must be exactly correct.
+ * Note: Sequence validation functionality has been disabled (removed).
+ * This is helpful to find back simplicity, leading to performance.
+ * It may be re-inserted later.
*/
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_convertBlockSequences_wBlockDelim_internal(ZSTD_CCtx* cctx,
- ZSTD_SequencePosition* seqPos,
+size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
- size_t blockSize,
- int const repcodeResolution,
- int const checkSequences)
+ int const repcodeResolution)
{
- U32 idx = seqPos->idx;
- U32 const startIdx = idx;
Repcodes_t updatedRepcodes;
- U32 dictSize = 0;
- size_t startPosInSrc = seqPos->posInSrc;
size_t litConsumed = 0;
+ size_t seqNb = 0;
- DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
+ DEBUGLOG(5, "ZSTD_convertBlockSequences (nbSequences = %zu)", nbSequences);
- /* dictSize is useful to check offset within Sequence validation */
- if (checkSequences) {
- if (cctx->cdict) {
- dictSize = (U32)cctx->cdict->dictContentSize;
- } else if (cctx->prefixDict.dict) {
- dictSize = (U32)cctx->prefixDict.dictSize;
- }
- }
+ RETURN_ERROR_IF(nbSequences >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
+ "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t));
+ /* check end condition */
+ assert(nbSequences >= 1);
+ assert(inSeqs[nbSequences-1].matchLength == 0);
+ assert(inSeqs[nbSequences-1].offset == 0);
+
/* Convert Sequences from public format to internal format */
- for (; idx < nbSequences && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
- U32 const litLength = inSeqs[idx].litLength;
- U32 const matchLength = inSeqs[idx].matchLength;
+ for (seqNb = 0; seqNb < nbSequences - 1 ; seqNb++) {
+ U32 const litLength = inSeqs[seqNb].litLength;
+ U32 const matchLength = inSeqs[seqNb].matchLength;
U32 offBase;
if (!repcodeResolution) {
- offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
+ offBase = OFFSET_TO_OFFBASE(inSeqs[seqNb].offset);
} else {
U32 const ll0 = (litLength == 0);
- offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+ offBase = ZSTD_finalizeOffBase(inSeqs[seqNb].offset, updatedRepcodes.rep, ll0);
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
}
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
- if (checkSequences) {
- seqPos->posInSrc += litLength + matchLength;
- FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch,
- seqPos->posInSrc,
- cctx->appliedParams.cParams.windowLog, dictSize,
- ZSTD_hasExtSeqProd(&cctx->appliedParams)),
- "Sequence validation failed");
- }
- RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
- "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
litConsumed += litLength;
}
/* last sequence (only literals) */
- { size_t const lastLitLength = inSeqs[idx].litLength;
- litConsumed += lastLitLength;
- if (checkSequences) {
- seqPos->posInSrc += lastLitLength;
- /* blockSize must be exactly correct (checked before calling this function) */
- RETURN_ERROR_IF((seqPos->posInSrc - startPosInSrc) != blockSize, externalSequences_invalid, "mismatch between Sequences and specified blockSize");
- } else {
- /* blockSize presumed exactly correct (checked before calling this function) */
- seqPos->posInSrc += blockSize;
- }
- }
+ litConsumed += inSeqs[nbSequences-1].litLength;
/* If we skipped repcode search while parsing, we need to update repcodes now */
- assert(idx >= startIdx);
- if (!repcodeResolution && idx != startIdx) {
+ if (!repcodeResolution && nbSequences > 1) {
U32* const rep = updatedRepcodes.rep;
- U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
- if (lastSeqIdx >= startIdx + 2) {
+ if (nbSequences >= 4) {
+ U32 lastSeqIdx = (U32)nbSequences - 2; /* index of last full sequence */
rep[2] = inSeqs[lastSeqIdx - 2].offset;
rep[1] = inSeqs[lastSeqIdx - 1].offset;
rep[0] = inSeqs[lastSeqIdx].offset;
- } else if (lastSeqIdx == startIdx + 1) {
+ } else if (nbSequences == 3) {
rep[2] = rep[0];
- rep[1] = inSeqs[lastSeqIdx - 1].offset;
- rep[0] = inSeqs[lastSeqIdx].offset;
+ rep[1] = inSeqs[0].offset;
+ rep[0] = inSeqs[1].offset;
} else {
- assert(lastSeqIdx == startIdx);
+ assert(nbSequences == 2);
rep[2] = rep[1];
rep[1] = rep[0];
- rep[0] = inSeqs[lastSeqIdx].offset;
+ rep[0] = inSeqs[0].offset;
}
}
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
- seqPos->idx = idx+1;
return litConsumed;
}
-/* for tests only */
-void CCTX_resetSeqStore(ZSTD_CCtx* cctx)
-{
- cctx->seqStore.sequences = cctx->seqStore.sequencesStart;
- cctx->seqStore.lit = cctx->seqStore.litStart;
-}
-
typedef size_t (*ZSTD_convertBlockSequences_f) (ZSTD_CCtx* cctx,
- ZSTD_SequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
- size_t blockSize,
int const repcodeResolution);
-size_t
-ZSTD_convertBlockSequences_wBlockDelim(ZSTD_CCtx* cctx,
- ZSTD_SequencePosition* seqPos,
- const ZSTD_Sequence* const inSeqs, size_t nbSequences,
- size_t blockSize,
- int const repcodeResolution)
+static size_t getNbSequencesFor1Block(const ZSTD_Sequence* seqs, size_t nbSeqs)
{
- return ZSTD_convertBlockSequences_wBlockDelim_internal(cctx,
- seqPos, inSeqs, nbSequences, blockSize,
- repcodeResolution, 0);
+ size_t n;
+ assert(seqs);
+ for (n=0; n<nbSeqs; n++) {
+ if (seqs[n].matchLength == 0) {
+ assert(seqs[n].offset == 0);
+ return n+1;
+ }
+ }
+ RETURN_ERROR(externalSequences_invalid, "missing final block delimiter");
}
-static size_t
-ZSTD_convertBlockSequences_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx,
- ZSTD_SequencePosition* seqPos,
- const ZSTD_Sequence* const inSeqs, size_t nbSequences,
- size_t blockSize,
- int const repcodeResolution)
-{
- return ZSTD_convertBlockSequences_wBlockDelim_internal(cctx,
- seqPos, inSeqs, nbSequences, blockSize,
- repcodeResolution, 1);
-}
static size_t
ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t nbSequences,
- const void* literals, size_t litSize, size_t srcSize)
+ const void* literals, size_t litSize)
{
size_t cSize = 0;
- size_t remaining = srcSize;
- ZSTD_SequencePosition seqPos = {0, 0, 0};
BYTE* op = (BYTE*)dst;
int const repcodeResolution = (cctx->appliedParams.searchForExternalRepcodes == ZSTD_ps_enable);
- const ZSTD_convertBlockSequences_f convertBlockSequences = cctx->appliedParams.validateSequences ?
- ZSTD_convertBlockSequences_wBlockDelim_andCheckSequences
- : ZSTD_convertBlockSequences_wBlockDelim;
-
- DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize);
- if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) {
- RETURN_ERROR(GENERIC, "This mode is only compatible with explicit delimiters");
- }
assert(cctx->appliedParams.searchForExternalRepcodes != ZSTD_ps_auto);
+ DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize);
+ RETURN_ERROR_IF(nbSequences == 0, externalSequences_invalid, "Requires at least 1 end-of-block");
+
/* Special case: empty frame */
- if (remaining == 0) {
+ if ((nbSequences == 1) && (inSeqs[0].litLength == 0)) {
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
MEM_writeLE32(op, cBlockHeader24);
@@ -7262,23 +7207,21 @@
cSize += ZSTD_blockHeaderSize;
}
- while (remaining) {
+ while (nbSequences) {
size_t compressedSeqsSize, cBlockSize, litConsumed;
- size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
- cctx->blockSize, remaining,
- inSeqs, nbSequences, seqPos);
- U32 const lastBlock = (blockSize == remaining);
- FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
- assert(blockSize <= remaining);
+ size_t nbBlockSeqs = getNbSequencesFor1Block(inSeqs, nbSequences);
+ U32 const lastBlock = (nbBlockSeqs == nbSequences);
+ FORWARD_IF_ERROR(nbBlockSeqs, "Error while trying to determine nb of sequences for a block");
+ assert(nbBlockSeqs <= nbSequences);
ZSTD_resetSeqStore(&cctx->seqStore);
- litConsumed = convertBlockSequences(cctx,
- &seqPos,
- inSeqs, nbSequences,
- blockSize,
+ litConsumed = ZSTD_convertBlockSequences(cctx,
+ inSeqs, nbBlockSeqs,
repcodeResolution);
FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion");
RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences");
+ inSeqs += nbBlockSeqs;
+ nbSequences -= nbBlockSeqs;
/* Note: when blockSize is very small, other variant send it uncompressed.
* Here, we still send the sequences, because we don't have the original source to send it uncompressed.
@@ -7286,16 +7229,18 @@
* but that's complex and costly memory intensive, which goes against the objectives of this variant. */
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
- compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer(
+
+ compressedSeqsSize = ZSTD_entropyCompressSeqStore_internal(
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
literals, litConsumed,
- blockSize,
&cctx->seqStore,
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
&cctx->appliedParams,
cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */,
cctx->bmi2);
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+ /* note: the spec forbids for any compressed block to be larger than maximum block size */
+ if (compressedSeqsSize > cctx->blockSizeMax) compressedSeqsSize = 0;
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
litSize -= litConsumed;
literals = (const char*)literals + litConsumed;
@@ -7331,7 +7276,6 @@
break;
} else {
op += cBlockSize;
- remaining -= blockSize;
dstCapacity -= cBlockSize;
cctx->isFirstBlock = 0;
}
@@ -7347,7 +7291,7 @@
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
- const void* literals, size_t litSize, size_t srcSize)
+ const void* literals, size_t litSize)
{
BYTE* op = (BYTE*)dst;
size_t cSize = 0;
@@ -7355,15 +7299,18 @@
/* Transparent initialization stage, same as compressStream2() */
DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals (dstCapacity=%zu)", dstCapacity);
assert(cctx != NULL);
- FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+ FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_continue, ZSTD_CONTENTSIZE_UNKNOWN), "CCtx initialization failed");
+ if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) {
+ RETURN_ERROR(frameParameter_unsupported, "This mode is only compatible with explicit delimiters");
+ }
if (cctx->appliedParams.fParams.checksumFlag) {
- RETURN_ERROR(frameParameter_unsupported, "this mode is incompatible with frame checksum");
+ RETURN_ERROR(frameParameter_unsupported, "this mode is not compatible with frame checksum");
}
/* Begin writing output, starting with frame header */
{ size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity,
- &cctx->appliedParams, srcSize, cctx->dictID);
+ &cctx->appliedParams, ZSTD_CONTENTSIZE_UNKNOWN, cctx->dictID);
op += frameHeaderSize;
assert(frameHeaderSize <= dstCapacity);
dstCapacity -= frameHeaderSize;
@@ -7374,7 +7321,7 @@
{ size_t const cBlocksSize = ZSTD_compressSequencesAndLiterals_internal(cctx,
op, dstCapacity,
inSeqs, inSeqsSize,
- literals, litSize, srcSize);
+ literals, litSize);
FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!");
cSize += cBlocksSize;
assert(cBlocksSize <= dstCapacity);
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index bbcf4a7..2d4ecc8 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -484,7 +484,7 @@
size_t dictContentSize;
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
- size_t blockSize;
+ size_t blockSizeMax;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize;
unsigned long long producedCSize;
@@ -1528,15 +1528,11 @@
size_t posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_SequencePosition;
-size_t
-ZSTD_convertBlockSequences_wBlockDelim(ZSTD_CCtx* cctx,
- ZSTD_SequencePosition* seqPos,
+/* for benchmark */
+size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
- size_t blockSize,
int const repcodeResolution);
-/* for tests only */
-void CCTX_resetSeqStore(ZSTD_CCtx* cctx);
/* ==============================================================
* Private declarations
diff --git a/lib/zstd.h b/lib/zstd.h
index 68e78b3..3fb0261 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1675,6 +1675,7 @@
* but it also features the following limitations:
* - Only supports explicit delimiter mode
* - Not compatible with frame checksum, which must disabled
+ * - Does not write the content size in frame header
* - If any block is incompressible, will fail and return an error
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
* - the buffer @literals must be larger than @litSize by at least 8 bytes.
@@ -1684,7 +1685,7 @@
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t nbSequences,
- const void* literals, size_t litSize, size_t srcSize);
+ const void* literals, size_t litSize);
/*! ZSTD_writeSkippableFrame() :
diff --git a/tests/Makefile b/tests/Makefile
index f24d5cb..982181d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -26,6 +26,9 @@
DEBUGLEVEL ?= 2
export DEBUGLEVEL # transmit value to sub-makefiles
+.PHONY: default
+default: fullbench
+
LIBZSTD_MK_DIR := ../lib
include $(LIBZSTD_MK_DIR)/libzstd.mk
@@ -78,9 +81,6 @@
ZSTDRTTEST = --test-large-data
DECODECORPUS_TESTTIME ?= -T30
-.PHONY: default
-default: fullbench
-
.PHONY: all
all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus roundTripCrash poolTests
diff --git a/tests/fullbench.c b/tests/fullbench.c
index c42c923..3f5044c 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -623,9 +623,9 @@
ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_repcodeResolution, ZSTD_ps_enable);
#endif
assert(12 + nbSeqs * sizeof(ZSTD_Sequence) + nbLiterals == inputSize); (void)inputSize;
- (void)payload;
+ (void)payload; (void)srcSize;
- return ZSTD_compressSequencesAndLiterals(g_zcc, dst, dstCapacity, seqs, nbSeqs, literals, nbLiterals, srcSize);
+ return ZSTD_compressSequencesAndLiterals(g_zcc, dst, dstCapacity, seqs, nbSeqs, literals, nbLiterals);
}
static PrepResult prepConvertSequences(const void* src, size_t srcSize, int cLevel)
@@ -669,22 +669,21 @@
void* dst, size_t dstCapacity,
void* payload)
{
- ZSTD_SequencePosition seqPos = { 0, 0 , 0 };
const char* ip = input;
size_t const blockSize = MEM_read32(ip);
size_t const nbSeqs = MEM_read32(ip+=4);
const ZSTD_Sequence* seqs = (const ZSTD_Sequence*)(const void*)(ip+=4);
ZSTD_CCtx_reset(g_zcc, ZSTD_reset_session_and_parameters);
- CCTX_resetSeqStore(g_zcc);
+ ZSTD_resetSeqStore(&g_zcc->seqStore);
ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
# if 0 /* for tests */
ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_repcodeResolution, ZSTD_ps_enable);
#endif
assert(8 + nbSeqs * sizeof(ZSTD_Sequence) == inputSize); (void)inputSize;
(void)dst; (void)dstCapacity;
- (void)payload;
+ (void)payload; (void)blockSize;
- return ZSTD_convertBlockSequences_wBlockDelim(g_zcc, &seqPos, seqs, nbSeqs, blockSize, 0);
+ return ZSTD_convertBlockSequences(g_zcc, seqs, nbSeqs, 0);
}
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index b200317..09572e9 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -3909,35 +3909,21 @@
FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs);
/* not enough literals: must fail */
- compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize);
+ compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1);
if (!ZSTD_isError(compressedSize)) {
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n");
goto _output_error;
}
/* too many literals: must fail */
- compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize);
+ compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1);
if (!ZSTD_isError(compressedSize)) {
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n");
goto _output_error;
}
- /* too short srcSize: must fail */
- compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize, srcSize-1);
- if (!ZSTD_isError(compressedSize)) {
- DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: srcSize is too short\n");
- goto _output_error;
- }
-
- /* too large srcSize: must fail */
- compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize, srcSize+1);
- if (!ZSTD_isError(compressedSize)) {
- DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: srcSize is too short\n");
- goto _output_error;
- }
-
/* correct amount of literals: should compress successfully */
- compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize);
+ compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n");
goto _output_error;