blob: fe520aaa3ea7341e711ca5864cc89f6804c963b9 [file] [log] [blame]
// Copyright 2011 The Goma Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "compress_util.h"
#include <string.h>
#include "absl/memory/memory.h"
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
#include "glog/logging.h"
namespace {
#ifdef ENABLE_LZMA
const size_t kDefaultLZMAOutputBufSize = 65536;
#endif
} // namespace
namespace devtools_goma {
const char* const kEncodingNames[NUM_ENCODINGS] = {
"no encoding",
"deflate",
"lzma2",
};
const char* GetEncodingName(EncodingType type) {
DCHECK_GE(type, NO_ENCODING);
DCHECK_LT(type, NUM_ENCODINGS);
return kEncodingNames[type];
}
EncodingType GetEncodingFromHeader(absl::string_view header) {
if (header.empty()) {
return NO_ENCODING;
}
if (absl::StrContains(header, "lzma2")) {
return ENCODING_LZMA2;
}
if (absl::StrContains(header, "deflate")) {
return ENCODING_DEFLATE;
}
return NO_ENCODING;
}
#ifdef ENABLE_LZMA
LZMAInputStream::LZMAInputStream(
std::unique_ptr<ZeroCopyInputStream> sub_stream)
: sub_stream_(std::move(sub_stream)),
lzma_context_(LZMA_STREAM_INIT), lzma_error_(LZMA_OK),
byte_count_(0) {
lzma_context_.next_in = nullptr;
lzma_context_.avail_in = 0;
output_buffer_size_ = kDefaultLZMAOutputBufSize;
output_buffer_.reset(new uint8_t[output_buffer_size_]);
lzma_context_.next_out = output_buffer_.get();
lzma_context_.avail_out = output_buffer_size_;
output_position_ = output_buffer_.get();
}
LZMAInputStream::~LZMAInputStream() {
lzma_end(&lzma_context_);
}
lzma_ret LZMAInputStream::Decode() {
if (lzma_error_ == LZMA_OK && lzma_context_.avail_out == 0) {
// previous decode filled buffer. don't change input params yet.
} else if (lzma_context_.avail_in == 0) {
const void* in;
int in_size;
bool first = lzma_context_.next_in == nullptr;
bool ok = sub_stream_->Next(&in, &in_size);
if (!ok) {
lzma_context_.next_out = nullptr;
lzma_context_.avail_out = 0;
return LZMA_STREAM_END;
}
lzma_context_.next_in = reinterpret_cast<const uint8_t*>(in);
lzma_context_.avail_in = in_size;
if (first) {
lzma_ret error = lzma_stream_decoder(&lzma_context_,
lzma_easy_decoder_memusage(9),
0);
if (error != LZMA_OK) {
return error;
}
}
}
lzma_context_.next_out = reinterpret_cast<uint8_t*>(output_buffer_.get());
lzma_context_.avail_out = output_buffer_size_;
output_position_ = output_buffer_.get();
return lzma_code(&lzma_context_, LZMA_RUN);
}
void LZMAInputStream::DoNextOutput(const void** data, int* size) {
*data = output_position_;
*size = lzma_context_.next_out - output_position_;
output_position_ = lzma_context_.next_out;
}
bool LZMAInputStream::Next(const void** data, int* size) {
bool ok = ((lzma_error_ == LZMA_OK) || (lzma_error_ == LZMA_STREAM_END) ||
(lzma_error_ == LZMA_BUF_ERROR));
if (!ok || (lzma_context_.next_out == nullptr)) {
return false;
}
if (lzma_context_.next_out != output_position_) {
DoNextOutput(data, size);
return true;
}
if (lzma_error_ == LZMA_STREAM_END) {
if (lzma_context_.next_out == nullptr) {
*data = nullptr;
*size = 0;
return false;
} else {
// TODO: consider to use lzma's concatenated stream support?
// sub_stream_ may have concatenated streams to follow.
lzma_end(&lzma_context_);
byte_count_ += lzma_context_.total_out;
lzma_error_ = lzma_stream_decoder(&lzma_context_,
lzma_easy_decoder_memusage(9),
0);
if (lzma_error_ != LZMA_OK) {
return false;
}
}
}
lzma_error_ = Decode();
if (lzma_error_ == LZMA_STREAM_END && lzma_context_.next_out == nullptr) {
// The underlying stream's Next returned false inside Decode.
return false;
}
ok = ((lzma_error_ == LZMA_OK) || (lzma_error_ == LZMA_STREAM_END) ||
(lzma_error_ == LZMA_BUF_ERROR));
if (!ok) {
return false;
}
DoNextOutput(data, size);
return true;
}
void LZMAInputStream::BackUp(int count) {
output_position_ -= count;
CHECK(output_position_ > output_buffer_.get());
}
bool LZMAInputStream::Skip(int count) {
const void* data;
int size;
bool ok = false;
while ((ok = Next(&data, &size)) && (size < count)) {
count -= size;
}
if (ok && (size > count)) {
BackUp(size - count);
}
return ok;
}
int64 LZMAInputStream::ByteCount() const {
int ret = byte_count_ + lzma_context_.total_out;
if (lzma_context_.next_out != nullptr && output_position_ != nullptr) {
// GzipInputStream adds followings but I think we need to remove.
//
// Followings won't be 0 if BackUp is called. In such a case,
// total_out contains the bytes it is pushed back by BackUp.
ret -= reinterpret_cast<uintptr_t>(lzma_context_.next_out) -
reinterpret_cast<uintptr_t>(output_position_);
}
return ret;
}
LZMAOutputStream::Options::Options()
: preset(LZMA_PRESET_DEFAULT), check(LZMA_CHECK_CRC64),
buffer_size(kDefaultLZMAOutputBufSize) {
}
LZMAOutputStream::LZMAOutputStream(
std::unique_ptr<ZeroCopyOutputStream> sub_stream) {
LZMAOutputStream::Options options;
Init(std::move(sub_stream), options);
}
LZMAOutputStream::LZMAOutputStream(
std::unique_ptr<ZeroCopyOutputStream> sub_stream, const Options& options) {
Init(std::move(sub_stream), options);
}
LZMAOutputStream::~LZMAOutputStream() {
lzma_end(&lzma_context_);
}
void LZMAOutputStream::Init(std::unique_ptr<ZeroCopyOutputStream> sub_stream,
const Options& options) {
sub_stream_ = std::move(sub_stream);
sub_data_ = nullptr;
sub_data_size_ = 0;
input_buffer_length_ = options.buffer_size;
CHECK_GT(input_buffer_length_, 0);
input_buffer_ = absl::make_unique<uint8_t[]>(input_buffer_length_);
CHECK(input_buffer_ != nullptr);
// LZMA_STREAM_INIT clears all fields, we do not need to clear them by
// ourselves.
lzma_context_ = LZMA_STREAM_INIT;
lzma_error_ = lzma_easy_encoder(&lzma_context_,
options.preset,
options.check);
}
lzma_ret LZMAOutputStream::Encode(lzma_action action) {
lzma_ret error = LZMA_OK;
do {
if (sub_data_ == nullptr || lzma_context_.avail_out == 0) {
bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_);
if (!ok) {
sub_data_ = nullptr;
sub_data_size_ = 0;
return LZMA_BUF_ERROR;
}
CHECK_GT(sub_data_size_, 0);
lzma_context_.next_out = static_cast<uint8_t*>(sub_data_);
lzma_context_.avail_out = sub_data_size_;
}
error = lzma_code(&lzma_context_, action);
} while (error == LZMA_OK && lzma_context_.avail_out == 0);
if (action == LZMA_FULL_FLUSH || action == LZMA_FINISH) {
// Notify lower layer of data.
sub_stream_->BackUp(lzma_context_.avail_out);
// We don't own the buffer any more.
sub_data_ = nullptr;
sub_data_size_ = 0;
}
return error;
}
bool LZMAOutputStream::Next(void** data, int* size) {
if (lzma_error_ != LZMA_OK && lzma_error_ == LZMA_BUF_ERROR) {
return false;
}
if (lzma_context_.avail_in != 0) {
lzma_error_ = Encode(LZMA_RUN);
if (lzma_error_ != LZMA_OK) {
return false;
}
}
if (lzma_context_.avail_in == 0) {
VLOG(3) << "updated avail_in"
<< " size=" << input_buffer_length_;
lzma_context_.next_in = input_buffer_.get();
lzma_context_.avail_in = input_buffer_length_;
*data = input_buffer_.get();
*size = input_buffer_length_;
} else {
LOG(DFATAL) << "lzma left bytes unconsumed";
}
return true;
}
void LZMAOutputStream::BackUp(int count) {
CHECK_GE(lzma_context_.avail_in, count);
lzma_context_.avail_in -= count;
}
int64 LZMAOutputStream::ByteCount() const {
return lzma_context_.total_in + lzma_context_.avail_in;
}
bool LZMAOutputStream::Close() {
if (lzma_error_ != LZMA_OK && lzma_error_ != LZMA_BUF_ERROR) {
return false;
}
do {
lzma_error_ = Encode(LZMA_FINISH);
} while (lzma_error_ == LZMA_OK);
return lzma_error_ == LZMA_STREAM_END;
}
#endif
InflateInputStream::InflateInputStream(
std::unique_ptr<ZeroCopyInputStream> sub_stream)
: zlib_content_(std::move(sub_stream)) {
// see chrome/src/net/filter/gzip_source_stream.cc InsertZlibHeader.
static const char kZlibHeader[2] = {0x78, 0x01};
zlib_header_ = absl::make_unique<ArrayInputStream>(kZlibHeader, 2);
sub_stream_inputs_.push_back(zlib_header_.get());
sub_stream_inputs_.push_back(zlib_content_.get());
sub_stream_ =
absl::make_unique<ConcatenatingInputStream>(
&sub_stream_inputs_[0], sub_stream_inputs_.size());
zlib_stream_ = absl::make_unique<GzipInputStream>(
sub_stream_.get(), GzipInputStream::ZLIB);
}
} // namespace devtools_goma