Lib/compression/zstd/__init__.py - external/github.com/python/cpython - Git at Google

 """Python bindings to the Zstandard (zstd) compression library (RFC-8878)."""

 __all__ = (
     # compression.zstd
     'COMPRESSION_LEVEL_DEFAULT',
     'compress',
     'CompressionParameter',
     'decompress',
     'DecompressionParameter',
     'finalize_dict',
     'get_frame_info',
     'Strategy',
     'train_dict',

     # compression.zstd._zstdfile
     'open',
     'ZstdFile',

     # _zstd
     'get_frame_size',
     'zstd_version',
     'zstd_version_info',
     'ZstdCompressor',
     'ZstdDecompressor',
     'ZstdDict',
     'ZstdError',
 )

 import _zstd
 import enum
 from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError,
                    get_frame_size, zstd_version)
 from compression.zstd._zstdfile import ZstdFile, open, _nbytes

 # zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE)
 zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100),
                      _zstd.zstd_version_number % 100)
 """Version number of the runtime zstd library as a tuple of integers."""

 COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT
 """The default compression level for Zstandard, currently '3'."""


 class FrameInfo:
     """Information about a Zstandard frame."""

     __slots__ = 'decompressed_size', 'dictionary_id'

     def __init__(self, decompressed_size, dictionary_id):
         super().__setattr__('decompressed_size', decompressed_size)
         super().__setattr__('dictionary_id', dictionary_id)

     def __repr__(self):
         return (f'FrameInfo(decompressed_size={self.decompressed_size}, '
                 f'dictionary_id={self.dictionary_id})')

     def __setattr__(self, name, _):
         raise AttributeError(f"can't set attribute {name!r}")


 def get_frame_info(frame_buffer):
     """Get Zstandard frame information from a frame header.

     *frame_buffer* is a bytes-like object. It should start from the beginning
     of a frame, and needs to include at least the frame header (6 to 18 bytes).

     The returned FrameInfo object has two attributes.
     'decompressed_size' is the size in bytes of the data in the frame when
     decompressed, or None when the decompressed size is unknown.
     'dictionary_id' is an int in the range (0, 2**32). The special value 0
     means that the dictionary ID was not recorded in the frame header,
     the frame may or may not need a dictionary to be decoded,
     and the ID of such a dictionary is not specified.
     """
     return FrameInfo(*_zstd.get_frame_info(frame_buffer))


 def train_dict(samples, dict_size):
     """Return a ZstdDict representing a trained Zstandard dictionary.

     *samples* is an iterable of samples, where a sample is a bytes-like
     object representing a file.

     *dict_size* is the dictionary's maximum size, in bytes.
     """
     if not isinstance(dict_size, int):
         ds_cls = type(dict_size).__qualname__
         raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.')

     samples = tuple(samples)
     chunks = b''.join(samples)
     chunk_sizes = tuple(_nbytes(sample) for sample in samples)
     if not chunks:
         raise ValueError("samples contained no data; can't train dictionary.")
     dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size)
     return ZstdDict(dict_content)


 def finalize_dict(zstd_dict, /, samples, dict_size, level):
     """Return a ZstdDict representing a finalized Zstandard dictionary.

     Given a custom content as a basis for dictionary, and a set of samples,
     finalize *zstd_dict* by adding headers and statistics according to the
     Zstandard dictionary format.

     You may compose an effective dictionary content by hand, which is used as
     basis dictionary, and use some samples to finalize a dictionary. The basis
     dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict.

     *samples* is an iterable of samples, where a sample is a bytes-like object
     representing a file.
     *dict_size* is the dictionary's maximum size, in bytes.
     *level* is the expected compression level. The statistics for each
     compression level differ, so tuning the dictionary to the compression level
     can provide improvements.
     """

     if not isinstance(zstd_dict, ZstdDict):
         raise TypeError('zstd_dict argument should be a ZstdDict object.')
     if not isinstance(dict_size, int):
         raise TypeError('dict_size argument should be an int object.')
     if not isinstance(level, int):
         raise TypeError('level argument should be an int object.')

     samples = tuple(samples)
     chunks = b''.join(samples)
     chunk_sizes = tuple(_nbytes(sample) for sample in samples)
     if not chunks:
         raise ValueError("The samples are empty content, can't finalize the "
                          "dictionary.")
     dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks,
                                        chunk_sizes, dict_size, level)
     return ZstdDict(dict_content)


 def compress(data, level=None, options=None, zstd_dict=None):
     """Return Zstandard compressed *data* as bytes.

     *level* is an int specifying the compression level to use, defaulting to
     COMPRESSION_LEVEL_DEFAULT ('3').
     *options* is a dict object that contains advanced compression
     parameters. See CompressionParameter for more on options.
     *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
     the function train_dict for how to train a ZstdDict on sample data.

     For incremental compression, use a ZstdCompressor instead.
     """
     comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict)
     return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME)


 def decompress(data, zstd_dict=None, options=None):
     """Decompress one or more frames of Zstandard compressed *data*.

     *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
     the function train_dict for how to train a ZstdDict on sample data.
     *options* is a dict object that contains advanced compression
     parameters. See DecompressionParameter for more on options.

     For incremental decompression, use a ZstdDecompressor instead.
     """
     results = []
     while True:
         decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict)
         results.append(decomp.decompress(data))
         if not decomp.eof:
             raise ZstdError('Compressed data ended before the '
                             'end-of-stream marker was reached')
         data = decomp.unused_data
         if not data:
             break
     return b''.join(results)


 class CompressionParameter(enum.IntEnum):
     """Compression parameters."""

     compression_level = _zstd.ZSTD_c_compressionLevel
     window_log = _zstd.ZSTD_c_windowLog
     hash_log = _zstd.ZSTD_c_hashLog
     chain_log = _zstd.ZSTD_c_chainLog
     search_log = _zstd.ZSTD_c_searchLog
     min_match = _zstd.ZSTD_c_minMatch
     target_length = _zstd.ZSTD_c_targetLength
     strategy = _zstd.ZSTD_c_strategy

     enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
     ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
     ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
     ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
     ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog

     content_size_flag = _zstd.ZSTD_c_contentSizeFlag
     checksum_flag = _zstd.ZSTD_c_checksumFlag
     dict_id_flag = _zstd.ZSTD_c_dictIDFlag

     nb_workers = _zstd.ZSTD_c_nbWorkers
     job_size = _zstd.ZSTD_c_jobSize
     overlap_log = _zstd.ZSTD_c_overlapLog

     def bounds(self):
         """Return the (lower, upper) int bounds of a compression parameter.

         Both the lower and upper bounds are inclusive.
         """
         return _zstd.get_param_bounds(self.value, is_compress=True)


 class DecompressionParameter(enum.IntEnum):
     """Decompression parameters."""

     window_log_max = _zstd.ZSTD_d_windowLogMax

     def bounds(self):
         """Return the (lower, upper) int bounds of a decompression parameter.

         Both the lower and upper bounds are inclusive.
         """
         return _zstd.get_param_bounds(self.value, is_compress=False)


 class Strategy(enum.IntEnum):
     """Compression strategies, listed from fastest to strongest.

     Note that new strategies might be added in the future.
     Only the order (from fast to strong) is guaranteed,
     the numeric value might change.
     """

     fast = _zstd.ZSTD_fast
     dfast = _zstd.ZSTD_dfast
     greedy = _zstd.ZSTD_greedy
     lazy = _zstd.ZSTD_lazy
     lazy2 = _zstd.ZSTD_lazy2
     btlazy2 = _zstd.ZSTD_btlazy2
     btopt = _zstd.ZSTD_btopt
     btultra = _zstd.ZSTD_btultra
     btultra2 = _zstd.ZSTD_btultra2


 # Check validity of the CompressionParameter & DecompressionParameter types
 _zstd.set_parameter_types(CompressionParameter, DecompressionParameter)
	"""Python bindings to the Zstandard (zstd) compression library (RFC-8878)."""

	__all__ = (
	# compression.zstd
	'COMPRESSION_LEVEL_DEFAULT',
	'compress',
	'CompressionParameter',
	'decompress',
	'DecompressionParameter',
	'finalize_dict',
	'get_frame_info',
	'Strategy',
	'train_dict',

	# compression.zstd._zstdfile
	'open',
	'ZstdFile',

	# _zstd
	'get_frame_size',
	'zstd_version',
	'zstd_version_info',
	'ZstdCompressor',
	'ZstdDecompressor',
	'ZstdDict',
	'ZstdError',
	)

	import _zstd
	import enum
	from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError,
	get_frame_size, zstd_version)
	from compression.zstd._zstdfile import ZstdFile, open, _nbytes

	# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE)
	zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100),
	_zstd.zstd_version_number % 100)
	"""Version number of the runtime zstd library as a tuple of integers."""

	COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT
	"""The default compression level for Zstandard, currently '3'."""


	class FrameInfo:
	"""Information about a Zstandard frame."""

	__slots__ = 'decompressed_size', 'dictionary_id'

	def __init__(self, decompressed_size, dictionary_id):
	super().__setattr__('decompressed_size', decompressed_size)
	super().__setattr__('dictionary_id', dictionary_id)

	def __repr__(self):
	return (f'FrameInfo(decompressed_size={self.decompressed_size}, '
	f'dictionary_id={self.dictionary_id})')

	def __setattr__(self, name, _):
	raise AttributeError(f"can't set attribute {name!r}")


	def get_frame_info(frame_buffer):
	"""Get Zstandard frame information from a frame header.

	frame_buffer is a bytes-like object. It should start from the beginning
	of a frame, and needs to include at least the frame header (6 to 18 bytes).

	The returned FrameInfo object has two attributes.
	'decompressed_size' is the size in bytes of the data in the frame when
	decompressed, or None when the decompressed size is unknown.
	'dictionary_id' is an int in the range (0, 2**32). The special value 0
	means that the dictionary ID was not recorded in the frame header,
	the frame may or may not need a dictionary to be decoded,
	and the ID of such a dictionary is not specified.
	"""
	return FrameInfo(*_zstd.get_frame_info(frame_buffer))


	def train_dict(samples, dict_size):
	"""Return a ZstdDict representing a trained Zstandard dictionary.

	samples is an iterable of samples, where a sample is a bytes-like
	object representing a file.

	dict_size is the dictionary's maximum size, in bytes.
	"""
	if not isinstance(dict_size, int):
	ds_cls = type(dict_size).__qualname__
	raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.')

	samples = tuple(samples)
	chunks = b''.join(samples)
	chunk_sizes = tuple(_nbytes(sample) for sample in samples)
	if not chunks:
	raise ValueError("samples contained no data; can't train dictionary.")
	dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size)
	return ZstdDict(dict_content)


	def finalize_dict(zstd_dict, /, samples, dict_size, level):
	"""Return a ZstdDict representing a finalized Zstandard dictionary.

	Given a custom content as a basis for dictionary, and a set of samples,
	finalize zstd_dict by adding headers and statistics according to the
	Zstandard dictionary format.

	You may compose an effective dictionary content by hand, which is used as
	basis dictionary, and use some samples to finalize a dictionary. The basis
	dictionary may be a "raw content" dictionary. See is_raw in ZstdDict.

	samples is an iterable of samples, where a sample is a bytes-like object
	representing a file.
	dict_size is the dictionary's maximum size, in bytes.
	level is the expected compression level. The statistics for each
	compression level differ, so tuning the dictionary to the compression level
	can provide improvements.
	"""

	if not isinstance(zstd_dict, ZstdDict):
	raise TypeError('zstd_dict argument should be a ZstdDict object.')
	if not isinstance(dict_size, int):
	raise TypeError('dict_size argument should be an int object.')
	if not isinstance(level, int):
	raise TypeError('level argument should be an int object.')

	samples = tuple(samples)
	chunks = b''.join(samples)
	chunk_sizes = tuple(_nbytes(sample) for sample in samples)
	if not chunks:
	raise ValueError("The samples are empty content, can't finalize the "
	"dictionary.")
	dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks,
	chunk_sizes, dict_size, level)
	return ZstdDict(dict_content)


	def compress(data, level=None, options=None, zstd_dict=None):
	"""Return Zstandard compressed data as bytes.

	level is an int specifying the compression level to use, defaulting to
	COMPRESSION_LEVEL_DEFAULT ('3').
	options is a dict object that contains advanced compression
	parameters. See CompressionParameter for more on options.
	zstd_dict is a ZstdDict object, a pre-trained Zstandard dictionary. See
	the function train_dict for how to train a ZstdDict on sample data.

	For incremental compression, use a ZstdCompressor instead.
	"""
	comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict)
	return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME)


	def decompress(data, zstd_dict=None, options=None):
	"""Decompress one or more frames of Zstandard compressed data.

	zstd_dict is a ZstdDict object, a pre-trained Zstandard dictionary. See
	the function train_dict for how to train a ZstdDict on sample data.
	options is a dict object that contains advanced compression
	parameters. See DecompressionParameter for more on options.

	For incremental decompression, use a ZstdDecompressor instead.
	"""
	results = []
	while True:
	decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict)
	results.append(decomp.decompress(data))
	if not decomp.eof:
	raise ZstdError('Compressed data ended before the '
	'end-of-stream marker was reached')
	data = decomp.unused_data
	if not data:
	break
	return b''.join(results)


	class CompressionParameter(enum.IntEnum):
	"""Compression parameters."""

	compression_level = _zstd.ZSTD_c_compressionLevel
	window_log = _zstd.ZSTD_c_windowLog
	hash_log = _zstd.ZSTD_c_hashLog
	chain_log = _zstd.ZSTD_c_chainLog
	search_log = _zstd.ZSTD_c_searchLog
	min_match = _zstd.ZSTD_c_minMatch
	target_length = _zstd.ZSTD_c_targetLength
	strategy = _zstd.ZSTD_c_strategy

	enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
	ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
	ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
	ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
	ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog

	content_size_flag = _zstd.ZSTD_c_contentSizeFlag
	checksum_flag = _zstd.ZSTD_c_checksumFlag
	dict_id_flag = _zstd.ZSTD_c_dictIDFlag

	nb_workers = _zstd.ZSTD_c_nbWorkers
	job_size = _zstd.ZSTD_c_jobSize
	overlap_log = _zstd.ZSTD_c_overlapLog

	def bounds(self):
	"""Return the (lower, upper) int bounds of a compression parameter.

	Both the lower and upper bounds are inclusive.
	"""
	return _zstd.get_param_bounds(self.value, is_compress=True)


	class DecompressionParameter(enum.IntEnum):
	"""Decompression parameters."""

	window_log_max = _zstd.ZSTD_d_windowLogMax

	def bounds(self):
	"""Return the (lower, upper) int bounds of a decompression parameter.

	Both the lower and upper bounds are inclusive.
	"""
	return _zstd.get_param_bounds(self.value, is_compress=False)


	class Strategy(enum.IntEnum):
	"""Compression strategies, listed from fastest to strongest.

	Note that new strategies might be added in the future.
	Only the order (from fast to strong) is guaranteed,
	the numeric value might change.
	"""

	fast = _zstd.ZSTD_fast
	dfast = _zstd.ZSTD_dfast
	greedy = _zstd.ZSTD_greedy
	lazy = _zstd.ZSTD_lazy
	lazy2 = _zstd.ZSTD_lazy2
	btlazy2 = _zstd.ZSTD_btlazy2
	btopt = _zstd.ZSTD_btopt
	btultra = _zstd.ZSTD_btultra
	btultra2 = _zstd.ZSTD_btultra2


	# Check validity of the CompressionParameter & DecompressionParameter types
	_zstd.set_parameter_types(CompressionParameter, DecompressionParameter)