blob: 344adaf31f23c02a1ea2bf94a90cb30b8fa1870b [file] [log] [blame]
// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Streams classes.
//
// These memory-resident streams are used for serializing data into a sequential
// region of memory.
//
// Streams are divided into SourceStreams for reading and SinkStreams for
// writing. Streams are aggregated into Sets which allows several streams to be
// used at once. Example: we can write A1, B1, A2, B2 but achieve the memory
// layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.
//
// The aggregated streams are important to Courgette's compression efficiency,
// we use it to cluster similar kinds of data which helps to generate longer
// common subsequences and repeated sequences.
#include "courgette/streams.h"
#include <memory.h>
#include <stddef.h>
#include <stdint.h>
#include "base/logging.h"
namespace courgette {
// Update this version number if the serialization format of a StreamSet
// changes.
static const unsigned int kStreamsSerializationFormatVersion = 20090218;
//
// This is a cut down Varint implementation, implementing only what we use for
// streams.
//
class Varint {
public:
// Maximum lengths of varint encoding of uint32_t
static const int kMax32 = 5;
// Parses a Varint32 encoded value from |source| and stores it in |output|,
// and returns a pointer to the following byte. Returns nullptr if a valid
// varint value was not found before |limit|.
static const uint8_t* Parse32WithLimit(const uint8_t* source,
const uint8_t* limit,
uint32_t* output);
// Writes the Varint32 encoded representation of |value| to buffer
// |destination|. |destination| must have sufficient length to hold kMax32
// bytes. Returns a pointer to the byte just past the last encoded byte.
static uint8_t* Encode32(uint8_t* destination, uint32_t value);
};
// Parses a Varint32 encoded unsigned number from |source|. The Varint32
// encoding is a little-endian sequence of bytes containing base-128 digits,
// with the high order bit set to indicate if there are more digits.
//
// For each byte, we mask out the digit and 'or' it into the right place in the
// result.
//
// The digit loop is unrolled for performance. It usually exits after the first
// one or two digits.
const uint8_t* Varint::Parse32WithLimit(const uint8_t* source,
const uint8_t* limit,
uint32_t* output) {
uint32_t digit, result;
if (source >= limit)
return nullptr;
digit = *(source++);
result = digit & 127;
if (digit < 128) {
*output = result;
return source;
}
if (source >= limit)
return nullptr;
digit = *(source++);
result |= (digit & 127) << 7;
if (digit < 128) {
*output = result;
return source;
}
if (source >= limit)
return nullptr;
digit = *(source++);
result |= (digit & 127) << 14;
if (digit < 128) {
*output = result;
return source;
}
if (source >= limit)
return nullptr;
digit = *(source++);
result |= (digit & 127) << 21;
if (digit < 128) {
*output = result;
return source;
}
if (source >= limit)
return nullptr;
digit = *(source++);
result |= (digit & 127) << 28;
if (digit < 128) {
*output = result;
return source;
}
return nullptr; // Value is too long to be a Varint32.
}
// Write the base-128 digits in little-endian order. All except the last digit
// have the high bit set to indicate more digits.
inline uint8_t* Varint::Encode32(uint8_t* destination, uint32_t value) {
while (value >= 128) {
*(destination++) = static_cast<uint8_t>(value) | 128;
value = value >> 7;
}
*(destination++) = static_cast<uint8_t>(value);
return destination;
}
void SourceStream::Init(const SinkStream& sink) {
Init(sink.Buffer(), sink.Length());
}
bool SourceStream::Read(void* destination, size_t count) {
if (current_ + count > end_)
return false;
memcpy(destination, current_, count);
current_ += count;
return true;
}
bool SourceStream::ReadVarint32(uint32_t* output_value) {
const uint8_t* after = Varint::Parse32WithLimit(current_, end_, output_value);
if (!after)
return false;
current_ = after;
return true;
}
bool SourceStream::ReadVarint32Signed(int32_t* output_value) {
// Signed numbers are encoded as unsigned numbers so that numbers nearer zero
// have shorter varint encoding.
// 0000xxxx encoded as 000xxxx0.
// 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
uint32_t unsigned_value;
if (!ReadVarint32(&unsigned_value))
return false;
if (unsigned_value & 1)
*output_value = ~static_cast<int32_t>(unsigned_value >> 1);
else
*output_value = (unsigned_value >> 1);
return true;
}
bool SourceStream::ShareSubstream(size_t offset, size_t length,
SourceStream* substream) {
if (offset > Remaining())
return false;
if (length > Remaining() - offset)
return false;
substream->Init(current_ + offset, length);
return true;
}
bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) {
if (!ShareSubstream(0, length, substream))
return false;
current_ += length;
return true;
}
bool SourceStream::Skip(size_t byte_count) {
if (current_ + byte_count > end_)
return false;
current_ += byte_count;
return true;
}
CheckBool SinkStream::Write(const void* data, size_t byte_count) {
return buffer_.append(static_cast<const char*>(data), byte_count);
}
CheckBool SinkStream::WriteVarint32(uint32_t value) {
uint8_t buffer[Varint::kMax32];
uint8_t* end = Varint::Encode32(buffer, value);
return Write(buffer, end - buffer);
}
CheckBool SinkStream::WriteVarint32Signed(int32_t value) {
// Encode signed numbers so that numbers nearer zero have shorter
// varint encoding.
// 0000xxxx encoded as 000xxxx0.
// 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
bool ret;
if (value < 0)
ret = WriteVarint32(~value * 2 + 1);
else
ret = WriteVarint32(value * 2);
return ret;
}
CheckBool SinkStream::WriteSizeVarint32(size_t value) {
uint32_t narrowed_value = static_cast<uint32_t>(value);
// On 32-bit, the compiler should figure out this test always fails.
LOG_ASSERT(value == narrowed_value);
return WriteVarint32(narrowed_value);
}
CheckBool SinkStream::Append(SinkStream* other) {
bool ret = Write(other->buffer_.data(), other->buffer_.size());
if (ret)
other->Retire();
return ret;
}
void SinkStream::Retire() {
buffer_.clear();
}
////////////////////////////////////////////////////////////////////////////////
SourceStreamSet::SourceStreamSet()
: count_(kMaxStreams) {
}
SourceStreamSet::~SourceStreamSet() = default;
// Initializes from |source|.
// The stream set for N streams is serialized as a header
// <version><N><length1><length2>...<lengthN>
// followed by the stream contents
// <bytes1><bytes2>...<bytesN>
//
bool SourceStreamSet::Init(const void* source, size_t byte_count) {
const uint8_t* start = static_cast<const uint8_t*>(source);
const uint8_t* end = start + byte_count;
unsigned int version;
const uint8_t* finger = Varint::Parse32WithLimit(start, end, &version);
if (finger == nullptr)
return false;
if (version != kStreamsSerializationFormatVersion)
return false;
unsigned int count;
finger = Varint::Parse32WithLimit(finger, end, &count);
if (finger == nullptr)
return false;
if (count > kMaxStreams)
return false;
count_ = count;
unsigned int lengths[kMaxStreams];
size_t accumulated_length = 0;
for (size_t i = 0; i < count_; ++i) {
finger = Varint::Parse32WithLimit(finger, end, &lengths[i]);
if (finger == nullptr)
return false;
accumulated_length += lengths[i];
}
// Remaining bytes should add up to sum of lengths.
if (static_cast<size_t>(end - finger) != accumulated_length)
return false;
accumulated_length = finger - start;
for (size_t i = 0; i < count_; ++i) {
stream(i)->Init(start + accumulated_length, lengths[i]);
accumulated_length += lengths[i];
}
return true;
}
bool SourceStreamSet::Init(SourceStream* source) {
// TODO(sra): consume the rest of |source|.
return Init(source->Buffer(), source->Remaining());
}
bool SourceStreamSet::ReadSet(SourceStreamSet* set) {
uint32_t stream_count = 0;
SourceStream* control_stream = this->stream(0);
if (!control_stream->ReadVarint32(&stream_count))
return false;
uint32_t lengths[kMaxStreams] = {}; // i.e. all zero.
for (size_t i = 0; i < stream_count; ++i) {
if (!control_stream->ReadVarint32(&lengths[i]))
return false;
}
for (size_t i = 0; i < stream_count; ++i) {
if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i)))
return false;
}
return true;
}
bool SourceStreamSet::Empty() const {
for (size_t i = 0; i < count_; ++i) {
if (streams_[i].Remaining() != 0)
return false;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
SinkStreamSet::SinkStreamSet()
: count_(kMaxStreams) {
}
SinkStreamSet::~SinkStreamSet() = default;
void SinkStreamSet::Init(size_t stream_index_limit) {
count_ = stream_index_limit;
}
// The header for a stream set for N streams is serialized as
// <version><N><length1><length2>...<lengthN>
CheckBool SinkStreamSet::CopyHeaderTo(SinkStream* header) {
bool ret = header->WriteVarint32(kStreamsSerializationFormatVersion);
if (ret) {
ret = header->WriteSizeVarint32(count_);
for (size_t i = 0; ret && i < count_; ++i) {
ret = header->WriteSizeVarint32(stream(i)->Length());
}
}
return ret;
}
// Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout
// of the stream metadata and contents.
CheckBool SinkStreamSet::CopyTo(SinkStream *combined_stream) {
SinkStream header;
bool ret = CopyHeaderTo(&header);
if (!ret)
return ret;
// Reserve the correct amount of storage.
size_t length = header.Length();
for (size_t i = 0; i < count_; ++i) {
length += stream(i)->Length();
}
ret = combined_stream->Reserve(length);
if (ret) {
ret = combined_stream->Append(&header);
for (size_t i = 0; ret && i < count_; ++i) {
ret = combined_stream->Append(stream(i));
}
}
return ret;
}
CheckBool SinkStreamSet::WriteSet(SinkStreamSet* set) {
uint32_t lengths[kMaxStreams];
// 'stream_count' includes all non-empty streams and all empty stream numbered
// lower than a non-empty stream.
size_t stream_count = 0;
for (size_t i = 0; i < kMaxStreams; ++i) {
SinkStream* stream = set->stream(i);
lengths[i] = static_cast<uint32_t>(stream->Length());
if (lengths[i] > 0)
stream_count = i + 1;
}
SinkStream* control_stream = this->stream(0);
bool ret = control_stream->WriteSizeVarint32(stream_count);
for (size_t i = 0; ret && i < stream_count; ++i) {
ret = control_stream->WriteSizeVarint32(lengths[i]);
}
for (size_t i = 0; ret && i < stream_count; ++i) {
ret = this->stream(i)->Append(set->stream(i));
}
return ret;
}
} // namespace