blob: 7fe5a6a3d0989e0980924f5d0129b7ffb7c95f24 [file] [log] [blame]
// Copyright 2007 The open-vcdiff Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Classes to implement an Encoder for the format described in
// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
//
// The RFC describes the possibility of using a secondary compressor
// to further reduce the size of each section of the VCDIFF output.
// That feature is not supported in this implementation of the encoder
// and decoder.
// No secondary compressor types have been publicly registered with
// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
// in the more than five years since the registry was created, so there
// is no standard set of compressor IDs which would be generated by other
// encoders or accepted by other decoders.
#include <config.h>
#include "checksum.h"
#include "google/encodetable.h"
#include "google/output_string.h"
#include "google/vcencoder.h"
#include "google/jsonwriter.h"
#include "logging.h"
#include "unique_ptr.h" // auto_ptr, unique_ptr
#include "vcdiffengine.h"
namespace open_vcdiff {
namespace {
// Helper function to create default CodeTableWriter
CodeTableWriterInterface* create_writer(
VCDiffFormatExtensionFlags format_extensions) {
if (format_extensions & VCD_FORMAT_JSON) {
return new JSONCodeTableWriter();
} else {
// This implementation of the encoder uses the default
// code table. A VCDiffCodeTableWriter could also be constructed
// using a custom code table.
return new VCDiffCodeTableWriter(
(format_extensions & VCD_FORMAT_INTERLEAVED) != 0);
}
}
} // namespace
HashedDictionary::HashedDictionary(const char* dictionary_contents,
size_t dictionary_size)
: engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
HashedDictionary::~HashedDictionary() { delete engine_; }
bool HashedDictionary::Init() {
return const_cast<VCDiffEngine*>(engine_)->Init();
}
class VCDiffStreamingEncoderImpl {
public:
VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
VCDiffFormatExtensionFlags format_extensions,
bool look_for_target_matches,
CodeTableWriterInterface* writer);
// These functions are identical to their counterparts
// in VCDiffStreamingEncoder.
bool StartEncoding(OutputStringInterface* out);
bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
bool FinishEncoding(OutputStringInterface* out);
private:
const VCDiffEngine* engine_;
UNIQUE_PTR<CodeTableWriterInterface> coder_;
const VCDiffFormatExtensionFlags format_extensions_;
// Determines whether to look for matches within the previously encoded
// target data, or just within the source (dictionary) data. Please see
// vcencoder.h for a full explanation of this parameter.
const bool look_for_target_matches_;
// This state variable is used to ensure that StartEncoding(), EncodeChunk(),
// and FinishEncoding() are called in the correct order. It will be true
// if StartEncoding() has been called, followed by zero or more calls to
// EncodeChunk(), but FinishEncoding() has not yet been called. It will
// be false initially, and also after FinishEncoding() has been called.
bool encode_chunk_allowed_;
// Making these private avoids implicit copy constructor & assignment operator
VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT
void operator=(const VCDiffStreamingEncoderImpl&);
};
inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
const HashedDictionary* dictionary,
VCDiffFormatExtensionFlags format_extensions,
bool look_for_target_matches,
CodeTableWriterInterface* writer)
: engine_(dictionary->engine()),
coder_(writer),
format_extensions_(format_extensions),
look_for_target_matches_(look_for_target_matches),
encode_chunk_allowed_(false) { }
inline bool VCDiffStreamingEncoderImpl::StartEncoding(
OutputStringInterface* out) {
if (!coder_->Init(engine_->dictionary_size())) {
VCD_DFATAL << "Internal error: "
"Initialization of code table writer failed" << VCD_ENDL;
return false;
}
if (!coder_->VerifyDictionary(engine_->dictionary(),
engine_->dictionary_size())) {
VCD_ERROR << "Dictionary not valid for writer" << VCD_ENDL;
return false;
}
coder_->WriteHeader(out, format_extensions_);
encode_chunk_allowed_ = true;
return true;
}
inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
const char* data,
size_t len,
OutputStringInterface* out) {
if (!encode_chunk_allowed_) {
VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
return false;
}
if (!coder_->VerifyChunk(data, len)) {
VCD_ERROR << "Target chunk not valid for writer" << VCD_ENDL;
return false;
}
if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
coder_->AddChecksum(ComputeAdler32(data, len));
}
engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
return true;
}
inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
OutputStringInterface* out) {
if (!encode_chunk_allowed_) {
VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
return false;
}
encode_chunk_allowed_ = false;
coder_->FinishEncoding(out);
return true;
}
VCDiffStreamingEncoder::VCDiffStreamingEncoder(
const HashedDictionary* dictionary,
VCDiffFormatExtensionFlags format_extensions,
bool look_for_target_matches)
: impl_(new VCDiffStreamingEncoderImpl(
dictionary,
format_extensions,
look_for_target_matches,
create_writer(format_extensions))) { }
VCDiffStreamingEncoder::VCDiffStreamingEncoder(
const HashedDictionary* dictionary,
VCDiffFormatExtensionFlags format_extensions,
bool look_for_target_matches,
CodeTableWriterInterface* writer)
: impl_(new VCDiffStreamingEncoderImpl(dictionary,
format_extensions,
look_for_target_matches,
writer)) { }
VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
bool VCDiffStreamingEncoder::StartEncodingToInterface(
OutputStringInterface* out) {
return impl_->StartEncoding(out);
}
bool VCDiffStreamingEncoder::EncodeChunkToInterface(
const char* data,
size_t len,
OutputStringInterface* out) {
return impl_->EncodeChunk(data, len, out);
}
bool VCDiffStreamingEncoder::FinishEncodingToInterface(
OutputStringInterface* out) {
return impl_->FinishEncoding(out);
}
bool VCDiffEncoder::EncodeToInterface(const char* target_data,
size_t target_len,
OutputStringInterface* out) {
out->clear();
if (!encoder_) {
if (!dictionary_.Init()) {
VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
return false;
}
encoder_ = new VCDiffStreamingEncoder(&dictionary_,
flags_,
look_for_target_matches_);
}
if (!encoder_->StartEncodingToInterface(out)) {
return false;
}
if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
return false;
}
return encoder_->FinishEncodingToInterface(out);
}
} // namespace open_vcdiff