blob: 8d3f816fd76e8c0eac13b2a478dda294e9cafcb4 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Rice-Golomb decoder for blacklist updates.
// Details at: https://en.wikipedia.org/wiki/Golomb_coding
#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
#define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
#include <ostream>
#include <string>
#include <vector>
#include "base/gtest_prod_util.h"
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
namespace safe_browsing {
// Enumerate different failure events while decoding the Rice-encoded string
// sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF
// THESE VALUES.
enum V4DecodeResult {
// No error.
DECODE_SUCCESS = 0,
// Exceeded the number of entries to expect.
DECODE_NO_MORE_ENTRIES_FAILURE = 1,
// Requested to decode >32 bits.
DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2,
// All bits had already been read and interpreted in the encoded string.
DECODE_RAN_OUT_OF_BITS_FAILURE = 3,
// The num_entries argument to DecodePrefixes or DecodeIntegers was negative.
NUM_ENTRIES_NEGATIVE_FAILURE = 4,
// Rice-encoding parameter was non-positive when the number of encoded entries
// was > 0.
RICE_PARAMETER_NON_POSITIVE_FAILURE = 5,
// |encoded_data| was empty when the number of encoded entries was > 0.
ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE = 6,
// decoded value had an integer overflow, which is unexpected.
DECODED_INTEGER_OVERFLOW_FAILURE = 7,
// Memory space for histograms is determined by the max. ALWAYS
// ADD NEW VALUES BEFORE THIS ONE.
DECODE_RESULT_MAX
};
class V4RiceDecoder {
public:
// Decodes the Rice-encoded string in |encoded_data| as a list of integers
// and stores them in |out|. |rice_parameter| is the exponent of 2 for
// calculating 'M', |first_value| is the first value in the output sequence,
// |num_entries| is the number of subsequent encoded entries. Each decoded
// value is a positive offset from the previous value.
// So, for instance, if the unencoded sequence is: [3, 7, 25], then
// |first_value| = 3, |num_entries| = 2 and decoding the |encoded_data| will
// produce the offsets: [4, 18].
static V4DecodeResult DecodeIntegers(
const ::google::protobuf::int64 first_value,
const ::google::protobuf::int32 rice_parameter,
const ::google::protobuf::int32 num_entries,
const std::string& encoded_data,
::google::protobuf::RepeatedField<::google::protobuf::int32>* out);
// Decodes the Rice-encoded string in |encoded_data| as a string of 4-byte
// hash prefixes and stores them in |out|. The rest of the arguments are the
// same as for |DecodeIntegers|.
// Important: |out| is only meant to be used as a concatenated list of sorted
// 4-byte hash prefixes, not as a vector of uint32_t values.
// This method does the following:
// 1. Rice-decode the |encoded_data| as a list of uint32_t values.
// 2. Flip the endianness (on little-endian machines) of each of these
// values. This is done because when a hash prefix is represented as a
// uint32_t, the bytes get reordered. This generates the hash prefix that
// the server would have sent in the absence of Rice-encoding.
// 3. Sort the resulting list of uint32_t values.
// 4. Flip the endianness once again since the uint32_t are expected to be
// consumed as a concatenated list of 4-byte hash prefixes, when merging
// the
// update with the existing state.
static V4DecodeResult DecodePrefixes(
const ::google::protobuf::int64 first_value,
const ::google::protobuf::int32 rice_parameter,
const ::google::protobuf::int32 num_entries,
const std::string& encoded_data,
std::vector<uint32_t>* out);
virtual ~V4RiceDecoder();
std::string DebugString() const;
private:
FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData);
FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData);
FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData);
FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries);
friend class V4RiceTest;
// Validate some of the parameters passed to the decode methods.
static V4DecodeResult ValidateInput(
const ::google::protobuf::int32 rice_parameter,
const ::google::protobuf::int32 num_entries,
const std::string& encoded_data);
// The |rice_parameter| is the exponent of 2 for calculating 'M',
// |num_entries| is the number of encoded entries in the |encoded_data| and
// |encoded_data| is the Rice-encoded string to decode.
V4RiceDecoder(const ::google::protobuf::int32 rice_parameter,
const ::google::protobuf::int32 num_entries,
const std::string& encoded_data);
// Returns true until |num_entries| entries have been decoded.
bool HasAnotherValue() const;
// Populates |value| with the next 32-bit unsigned integer decoded from
// |encoded_data|.
V4DecodeResult GetNextValue(uint32_t* value);
// Reads in up to 32 bits from |encoded_data| into |word|, from which
// subsequent GetNextBits() calls read bits.
V4DecodeResult GetNextWord(uint32_t* word);
// Reads |num_requested_bits| into |x| from |current_word_| and advances it
// if needed by calling GetNextWord().
V4DecodeResult GetNextBits(unsigned int num_requested_bits, uint32_t* x);
// Reads |num_requested_bits| from |current_word_|.
uint32_t GetBitsFromCurrentWord(unsigned int num_requested_bits);
// The Rice parameter, which is the exponent of two for calculating 'M'. 'M'
// is used as the base to calculate the quotient and remainder in the
// algorithm.
const unsigned int rice_parameter_;
// The number of entries encoded in the data stream.
::google::protobuf::int32 num_entries_;
// The Rice-encoded string.
const std::string data_;
// Represents how many total bytes have we read from |data_| into
// |current_word_|.
unsigned int data_byte_index_;
// Represents the number of bits that we have read from |current_word_|. When
// this becomes 32, which is the size of |current_word_|, a new
// |current_word_| needs to be read from |data_|.
unsigned int current_word_bit_index_;
// The 32-bit value read from |data_|. All bit reading operations operate on
// |current_word_|.
uint32_t current_word_;
};
std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder);
} // namespace safe_browsing
#endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_