blob: 33d1c008c6bfcbeb5bb154ca83449b8dce59014f [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_TEST_FUZZED_DATA_PROVIDER_H_
#define BASE_TEST_FUZZED_DATA_PROVIDER_H_
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <cstring>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
// A single-header library providing an utility class to break up an array of
// bytes (supposedly provided by a fuzzing engine) for multiple consumers.
// Whenever run on the same input, provides the same output, as long as its
// methods are called in the same order, with the same arguments.
// TODO(mmoroz): move this out of //base as it should be an independent library.
namespace base {
class FuzzedDataProvider {
public:
typedef uint8_t data_type;
// |data| is an array of length |size| that the FuzzedDataProvider wraps to
// provide more granular access. |data| must outlive the FuzzedDataProvider.
FuzzedDataProvider(const uint8_t* data, size_t size)
: data_ptr_(data), remaining_bytes_(size) {}
~FuzzedDataProvider() = default;
// Returns a std::vector containing |num_bytes| of input data. If fewer than
// |num_bytes| of data remain, returns a shorter std::vector containing all
// of the data that's left.
template <typename T = data_type>
std::vector<T> ConsumeBytes(size_t num_bytes) {
static_assert(sizeof(T) == sizeof(data_type), "Incompatible data type.");
num_bytes = std::min(num_bytes, remaining_bytes_);
// The point of using the size-based constructor below is to increase the
// odds of having a vector object with capacity being equal to the length.
// That part is always implementation specific, but at least both libc++ and
// libstdc++ allocate the requested number of bytes in that constructor,
// which seems to be a natual choice for other implementations as well.
// To increase the odds even more, we also call |shrink_to_fit| below.
std::vector<T> result(num_bytes);
std::memcpy(result.data(), data_ptr_, num_bytes);
Advance(num_bytes);
// Even though |shrink_to_fit| is also implementation specific, we expect it
// to provide an additional assurance in case vector's constructor allocated
// a buffer which is larger than the actual amount of data we put inside it.
result.shrink_to_fit();
return result;
}
// Prefer using |ConsumeBytes| unless you actually need a std::string object.
// Returns a std::string containing |num_bytes| of input data. If fewer than
// |num_bytes| of data remain, returns a shorter std::string containing all
// of the data that's left.
std::string ConsumeBytesAsString(size_t num_bytes) {
static_assert(sizeof(std::string::value_type) == sizeof(data_type),
"ConsumeBytesAsString cannot convert the data to a string.");
num_bytes = std::min(num_bytes, remaining_bytes_);
std::string result(
reinterpret_cast<const std::string::value_type*>(data_ptr_), num_bytes);
Advance(num_bytes);
return result;
}
// Returns a number in the range [min, max] by consuming bytes from the input
// data. The value might not be uniformly distributed in the given range. If
// there's no input data left, always returns |min|. |min| must be less than
// or equal to |max|.
template <typename T>
T ConsumeIntegralInRange(T min, T max) {
static_assert(std::is_integral<T>::value, "An integral type is required.");
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
if (min > max)
abort();
// Use the biggest type possible to hold the range and the result.
uint64_t range = static_cast<uint64_t>(max) - min;
uint64_t result = 0;
size_t offset = 0;
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
remaining_bytes_ != 0) {
// Pull bytes off the end of the seed data. Experimentally, this seems to
// allow the fuzzer to more easily explore the input space. This makes
// sense, since it works by modifying inputs that caused new code to run,
// and this data is often used to encode length of data read by
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
// contents of the data that is actually read.
--remaining_bytes_;
result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
offset += CHAR_BIT;
}
// Avoid division by 0, in the case |range + 1| results in overflow.
if (range != std::numeric_limits<decltype(range)>::max())
result = result % (range + 1);
return static_cast<T>(min + result);
}
// Returns a std::string of length from 0 to |max_length|. When it runs out of
// input data, returns what remains of the input. Designed to be more stable
// with respect to a fuzzer inserting characters than just picking a random
// length and then consuming that many bytes with |ConsumeBytes|.
std::string ConsumeRandomLengthString(size_t max_length) {
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
// followed by anything else to the end of the string. As a result of this
// logic, a fuzzer can insert characters into the string, and the string
// will be lengthened to include those new characters, resulting in a more
// stable fuzzer than picking the length of a string independently from
// picking its contents.
std::string result;
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
char next = static_cast<char>(data_ptr_[0]);
Advance(1);
if (next == '\\' && remaining_bytes_ != 0) {
next = static_cast<char>(data_ptr_[0]);
Advance(1);
if (next != '\\')
return result;
}
result += next;
}
result.shrink_to_fit();
return result;
}
// Returns a std::vector containing all remaining bytes of the input data.
template <typename T = data_type>
std::vector<T> ConsumeRemainingBytes() {
return ConsumeBytes<T>(remaining_bytes_);
}
// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
// object.
// Returns a std::vector containing all remaining bytes of the input data.
std::string ConsumeRemainingBytesAsString() {
return ConsumeBytesAsString(remaining_bytes_);
}
// Returns a number in the range [Type's min, Type's max]. The value might
// not be uniformly distributed in the given range. If there's no input data
// left, always returns |min|.
template <typename T>
T ConsumeIntegral() {
return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
// Reads one byte and returns a bool, or false when no data remains.
bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
// Returns a value from |array|, consuming as many bytes as needed to do so.
// |array| must be a fixed-size array.
template <typename T, size_t size>
T PickValueInArray(T (&array)[size]) {
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
}
// Return an enum value. The enum must start at 0 and be contiguous. It must
// also contain kMaxValue aliased to its largest (inclusive) value. Such as:
// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
template <typename T>
T ConsumeEnum() {
static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
0, static_cast<uint32_t>(T::kMaxValue)));
}
// Reports the remaining bytes available for fuzzed input.
size_t remaining_bytes() { return remaining_bytes_; }
private:
FuzzedDataProvider(const FuzzedDataProvider&) = delete;
FuzzedDataProvider& operator=(const FuzzedDataProvider&) = delete;
void Advance(size_t num_bytes) {
if (num_bytes > remaining_bytes_)
abort();
data_ptr_ += num_bytes;
remaining_bytes_ -= num_bytes;
}
const data_type* data_ptr_;
size_t remaining_bytes_;
};
} // namespace base
#endif // BASE_TEST_FUZZED_DATA_PROVIDER_H_