blob: 8ccdafdcec55edb43a9e26ea6a68f465a1fb5c5e [file] [log] [blame]
/*
* Copyright 2017 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Translate a binary stream of bytes into a valid wasm module, *somehow*.
// This is helpful for fuzzing.
//
#include "ir/branch-utils.h"
#include "ir/struct-utils.h"
#include "support/insert_ordered.h"
#include "tools/fuzzing/random.h"
#include <ir/eh-utils.h>
#include <ir/find_all.h>
#include <ir/literal-utils.h>
#include <ir/manipulation.h>
#include <ir/names.h>
#include <ir/public-type-validator.h>
#include <ir/utils.h>
#include <support/file.h>
#include <tools/optimization-options.h>
#include <wasm-builder.h>
namespace wasm {
// helper structs, since list initialization has a fixed order of
// evaluation, avoiding UB
struct ThreeArgs {
Expression* a;
Expression* b;
Expression* c;
};
struct UnaryArgs {
UnaryOp a;
Expression* b;
};
struct BinaryArgs {
BinaryOp a;
Expression* b;
Expression* c;
};
// params
struct FuzzParams {
// The maximum amount of params to each function.
int MAX_PARAMS;
// The maximum amount of vars in each function.
int MAX_VARS;
// The maximum number of globals in a module.
int MAX_GLOBALS;
// The maximum number of tuple elements.
int MAX_TUPLE_SIZE;
// The maximum number of struct fields.
int MAX_STRUCT_SIZE;
// The maximum number of elements in an array.
int MAX_ARRAY_SIZE;
// The number of nontrivial heap types to generate.
int MIN_HEAPTYPES;
int MAX_HEAPTYPES;
// some things require luck, try them a few times
int TRIES;
// beyond a nesting limit, greatly decrease the chance to continue to nest
int NESTING_LIMIT;
// the maximum size of a block
int BLOCK_FACTOR;
// the memory that we use, a small portion so that we have a good chance of
// looking at writes (we also look outside of this region with small
// probability) this should be a power of 2
Address USABLE_MEMORY;
// the number of runtime iterations (function calls, loop backbranches) we
// allow before we stop execution with a trap, to prevent hangs. 0 means
// no hang protection.
int HANG_LIMIT;
// the maximum amount of new GC types (structs, etc.) to create
int MAX_NEW_GC_TYPES;
// the maximum amount of catches in each try (not including a catch-all, if
// present).
int MAX_TRY_CATCHES;
FuzzParams() { setDefaults(); }
void setDefaults();
};
// main reader
class TranslateToFuzzReader {
static constexpr size_t VeryImportant = 4;
static constexpr size_t Important = 2;
public:
TranslateToFuzzReader(Module& wasm,
std::vector<char>&& input,
bool closedWorld = false);
TranslateToFuzzReader(Module& wasm,
std::string& filename,
bool closedWorld = false);
void pickPasses(OptimizationOptions& options);
void setAllowMemory(bool allowMemory_) { allowMemory = allowMemory_; }
void setAllowOOB(bool allowOOB_) { allowOOB = allowOOB_; }
void setPreserveImportsAndExports(bool preserveImportsAndExports_) {
preserveImportsAndExports = preserveImportsAndExports_;
}
void setImportedModule(std::string importedModuleName);
void build();
Module& wasm;
private:
// Whether the module will be tested in a closed-world environment.
bool closedWorld;
Builder builder;
Random random;
// Whether to emit memory operations like loads and stores.
bool allowMemory = true;
// Whether to emit loads, stores, and call_indirects that may be out
// of bounds (which traps in wasm, and is undefined behavior in C).
bool allowOOB = true;
// Whether we preserve imports and exports. Normally we add imports (for
// logging and other useful functionality for testing), and add exports of
// functions as we create them. With this set, we add neither imports nor
// exports, which is useful if the tool using us only wants us to mutate an
// existing testcase (using initial-content).
bool preserveImportsAndExports = false;
// An optional module to import from.
std::optional<Module> importedModule;
// Whether we allow the fuzzer to add unreachable code when generating changes
// to existing code. This is randomized during startup, but could be an option
// like the above options eventually if we find that useful.
bool allowAddingUnreachableCode;
// Whether to emit atomic waits (which in single-threaded mode, may hang...)
static const bool ATOMIC_WAITS = false;
// The chance to emit a logging operation for a none expression. We
// randomize this in each function.
unsigned LOGGING_PERCENT = 0;
Name HANG_LIMIT_GLOBAL;
Name funcrefTableName;
Name exnrefTableName;
std::unordered_map<Type, Name> logImportNames;
Name hashMemoryName;
Name throwImportName;
Name tableGetImportName;
Name tableSetImportName;
Name callExportImportName;
Name callExportCatchImportName;
Name callRefImportName;
Name callRefCatchImportName;
Name sleepImportName;
std::unordered_map<Type, std::vector<Name>> globalsByType;
std::unordered_map<Type, std::vector<Name>> mutableGlobalsByType;
std::unordered_map<Type, std::vector<Name>> immutableGlobalsByType;
std::unordered_map<Type, std::vector<Name>> importedImmutableGlobalsByType;
std::vector<Type> loggableTypes;
// The heap types we can pick from to generate instructions.
std::vector<HeapType> interestingHeapTypes;
// A mapping of a heap type to the subset of interestingHeapTypes that are
// subtypes of it.
std::unordered_map<HeapType, std::vector<HeapType>> interestingHeapSubTypes;
// Type => list of struct fields that have that type.
std::unordered_map<Type, std::vector<StructField>> typeStructFields;
// Type => list of array types that have that type.
std::unordered_map<Type, std::vector<HeapType>> typeArrays;
// All struct fields that are mutable.
std::vector<StructField> mutableStructFields;
// All arrays that are mutable.
std::vector<HeapType> mutableArrays;
// All tags that are valid as exception tags (which cannot have results).
std::vector<Tag*> exceptionTags;
Index numAddedFunctions = 0;
// The name of an empty tag.
Name trivialTag;
// Whether we were given initial functions.
bool haveInitialFunctions;
// RAII helper for managing the state used to create a single function.
struct FunctionCreationContext {
TranslateToFuzzReader& parent;
Function* func;
std::vector<Expression*> breakableStack; // things we can break to
Index labelIndex = 0;
// a list of things relevant to computing the odds of an infinite loop,
// which we try to minimize the risk of
std::vector<Expression*> hangStack;
// type => list of locals with that type
std::unordered_map<Type, std::vector<Index>> typeLocals;
FunctionCreationContext(TranslateToFuzzReader& parent, Function* func);
~FunctionCreationContext();
// Fill in the typeLocals data structure.
void computeTypeLocals() {
typeLocals.clear();
for (Index i = 0; i < func->getNumLocals(); i++) {
typeLocals[func->getLocalType(i)].push_back(i);
}
}
};
FunctionCreationContext* funcContext = nullptr;
// The fuzzing parameters we use. This may change from function to function or
// even in a more refined manner, so we use an RAII context to manage it.
struct FuzzParamsContext : public FuzzParams {
TranslateToFuzzReader& parent;
FuzzParamsContext* old;
FuzzParamsContext(TranslateToFuzzReader& parent)
: parent(parent), old(parent.fuzzParams) {
parent.fuzzParams = this;
}
~FuzzParamsContext() { parent.fuzzParams = old; }
};
FuzzParamsContext* fuzzParams = nullptr;
// The default global context we use throughout the process (unless it is
// overridden using another context in an RAII manner).
std::unique_ptr<FuzzParamsContext> globalParams;
public:
int nesting = 0;
struct AutoNester {
TranslateToFuzzReader& parent;
size_t amount = 1;
AutoNester(TranslateToFuzzReader& parent) : parent(parent) {
parent.nesting++;
}
~AutoNester() { parent.nesting -= amount; }
// Add more nesting manually.
void add(size_t more) {
parent.nesting += more;
amount += more;
}
};
private:
// Generating random data is common enough that it's worth having helpers that
// forward to `random`.
int8_t get() { return random.get(); }
int16_t get16() { return random.get16(); }
int32_t get32() { return random.get32(); }
int64_t get64() { return random.get64(); }
float getFloat() { return random.getFloat(); }
double getDouble() { return random.getDouble(); }
Index upTo(Index x) { return random.upTo(x); }
bool oneIn(Index x) { return random.oneIn(x); }
Index upToSquared(Index x) { return random.upToSquared(x); }
// Pick from a vector-like container or a fixed list.
template<typename T> const typename T::value_type& pick(const T& vec) {
return random.pick(vec);
}
template<typename T, typename... Args> T pick(T first, Args... args) {
return random.pick(first, args...);
}
// Pick from options associated with features.
template<typename T> using FeatureOptions = Random::FeatureOptions<T>;
template<typename T> const T pick(FeatureOptions<T>& picker) {
return random.pick(picker);
}
// Setup methods
void setupMemory();
void setupHeapTypes();
void setupTables();
void setupGlobals();
void setupTags();
void addTag();
void finalizeMemory();
void finalizeTable();
void shuffleExports();
void prepareHangLimitSupport();
void addHangLimitSupport();
void addImportLoggingSupport();
void addImportCallingSupport();
void addImportThrowingSupport();
void addImportTableSupport();
void addImportSleepSupport();
void addHashMemorySupport();
// Special expression makers
Expression* makeHangLimitCheck();
Expression* makeImportLogging();
Expression* makeImportThrowing(Type type);
Expression* makeImportTableGet();
Expression* makeImportTableSet(Type type);
// Call either an export or a ref. We do this from a single function to better
// control the frequency of each.
Expression* makeImportCallCode(Type type);
Expression* makeImportSleep(Type type);
Expression* makeMemoryHashLogging();
// We must be careful not to add exports that have invalid public types, such
// as those that reach exact types when custom descriptors is disabled.
PublicTypeValidator publicTypeValidator;
bool isValidPublicType(Type type) {
return publicTypeValidator.isValidPublicType(type);
}
// Function operations. The main processFunctions() loop will call addFunction
// as well as modFunction().
void processFunctions();
// Add a new function.
Function* addFunction();
// Modify an existing function.
void modFunction(Function* func);
void addHangLimitChecks(Function* func);
void useImportedFunctions();
void useImportedGlobals();
// Recombination and mutation
// Recombination and mutation can replace a node with another node of the same
// type, but should not do so for certain types that are dangerous. For
// example, it would be bad to add a non-nullable reference to a tuple, as
// that would force us to use temporary locals for the tuple, but non-nullable
// references cannot always be stored in locals. Also, the 'pop' pseudo
// instruction for EH is supposed to exist only at the beginning of a 'catch'
// block, so it shouldn't be moved around or deleted freely.
bool canBeArbitrarilyReplaced(Expression* curr) {
// TODO: Remove this once we better support exact references.
if (curr->type.isExact()) {
return false;
}
return curr->type.isDefaultable() &&
!EHUtils::containsValidDanglingPop(curr);
}
void recombine(Function* func);
void mutate(Function* func);
// Fix up the IR after recombination and mutation.
void fixAfterChanges(Function* func);
void modifyInitialFunctions();
// Note a global for use during code generation.
void useGlobalLater(Global* global);
// Initial wasm contents may have come from a test that uses the drop pattern:
//
// (drop ..something interesting..)
//
// The dropped interesting thing is optimized to some other interesting thing
// by a pass, and we verify it is the expected one. But this does not use the
// value in a way the fuzzer can notice. Replace some drops with a logging
// operation instead.
void dropToLog(Function* func);
// the fuzzer external interface sends in zeros (simpler to compare
// across invocations from JS or wasm-opt etc.). Add invocations in
// the wasm, so they run everywhere
void addInvocations(Function* func);
Name makeLabel() {
return std::string("label$") + std::to_string(funcContext->labelIndex++);
}
// Expression making methods. Always call the toplevel make(type) command, not
// the specific ones.
Expression* make(Type type);
Expression* _makeConcrete(Type type);
Expression* _makenone();
Expression* _makeunreachable();
// Make something with no chance of infinite recursion.
Expression* makeTrivial(Type type);
// We must note when we are nested in a makeTrivial() call. When we are, all
// operations must try to be as trivial as possible.
int trivialNesting = 0;
// Specific expression creators
Expression* makeBlock(Type type);
Expression* makeLoop(Type type);
Expression* makeCondition();
// Make something, with a good chance of it being a block
Expression* makeMaybeBlock(Type type);
Expression* buildIf(const struct ThreeArgs& args, Type type);
Expression* makeIf(Type type);
Expression* makeTry(Type type);
Expression* makeTryTable(Type type);
Expression* makeBreak(Type type);
Expression* makeCall(Type type);
Expression* makeCallIndirect(Type type);
Expression* makeCallRef(Type type);
Expression* makeLocalGet(Type type);
Expression* makeLocalSet(Type type);
Expression* makeGlobalGet(Type type);
Expression* makeGlobalSet(Type type);
Expression* makeTupleMake(Type type);
Expression* makeTupleExtract(Type type);
Expression* makePointer();
Expression* makeNonAtomicLoad(Type type);
Expression* makeLoad(Type type);
Expression* makeNonAtomicStore(Type type);
Expression* makeStore(Type type);
// Makes a small change to a constant value.
Literal tweak(Literal value);
Literal makeLiteral(Type type);
Expression* makeRefFuncConst(Type type);
// Emit a constant expression for a given type, as best we can. We may not be
// able to emit a literal Const, like say if the type is a function reference
// then we may emit a RefFunc, but also we may have other requirements, like
// we may add a GC cast to fixup the type.
Expression* makeConst(Type type);
// Generate reference values. One function handles basic types, and the other
// compound ones.
Expression* makeBasicRef(Type type);
Expression* makeCompoundRef(Type type);
Expression* makeStringConst();
Expression* makeStringNewArray();
Expression* makeStringNewCodePoint();
Expression* makeStringConcat();
Expression* makeStringSlice();
Expression* makeStringEq(Type type);
Expression* makeStringMeasure(Type type);
Expression* makeStringGet(Type type);
Expression* makeStringEncode(Type type);
// Similar to makeBasic/CompoundRef, but indicates that this value will be
// used in a place that will trap on null. For example, the reference of a
// struct.get or array.set would use this.
Expression* makeTrappingRefUse(HeapType type);
Expression* makeTrappingRefUse(Type type);
Expression* buildUnary(const UnaryArgs& args);
Expression* makeUnary(Type type);
Expression* buildBinary(const BinaryArgs& args);
Expression* makeBinary(Type type);
Expression* buildSelect(const ThreeArgs& args);
Expression* makeSelect(Type type);
Expression* makeSwitch(Type type);
Expression* makeDrop(Type type);
Expression* makeReturn(Type type);
Expression* makeNop(Type type);
Expression* makeUnreachable(Type type);
Expression* makeAtomic(Type type);
Expression* makeSIMD(Type type);
Expression* makeSIMDExtract(Type type);
Expression* makeSIMDReplace();
Expression* makeSIMDShuffle();
Expression* makeSIMDTernary();
Expression* makeSIMDShift();
Expression* makeSIMDLoad();
Expression* makeBulkMemory(Type type);
Expression* makeTableGet(Type type);
Expression* makeTableSet(Type type);
// TODO: support other RefIs variants, and rename this
Expression* makeRefIsNull(Type type);
Expression* makeRefEq(Type type);
Expression* makeRefTest(Type type);
Expression* makeRefCast(Type type);
Expression* makeRefGetDesc(Type type);
Expression* makeBrOn(Type type);
// Decide to emit a signed Struct/ArrayGet sometimes, when the field is
// packed.
bool maybeSignedGet(const Field& field);
Expression* makeStructGet(Type type);
Expression* makeStructSet(Type type);
Expression* makeArrayGet(Type type);
Expression* makeArraySet(Type type);
// Use a single method for the misc array operations, to not give them too
// much representation (e.g. compared to struct operations, which only include
// get/set).
Expression* makeArrayBulkMemoryOp(Type type);
Expression* makeI31Get(Type type);
Expression* makeThrow(Type type);
Expression* makeThrowRef(Type type);
Expression* makeMemoryInit();
Expression* makeDataDrop();
Expression* makeMemoryCopy();
Expression* makeMemoryFill();
// Getters for Types
Type getSingleConcreteType();
Type getReferenceType();
Type getCastableReferenceType();
Type getEqReferenceType();
Type getMVPType();
Type getTupleType();
Type getConcreteType();
Type getControlFlowType();
Type getStorableType();
Type getLoggableType();
bool isLoggableType(Type type);
Nullability getNullability();
Exactness getExactness();
Nullability getSubType(Nullability nullability);
Exactness getSubType(Exactness exactness);
HeapType getSubType(HeapType type);
Type getSubType(Type type);
Nullability getSuperType(Nullability nullability);
HeapType getSuperType(HeapType type);
Type getSuperType(Type type);
// Utilities
Name getTargetName(Expression* target);
Type getTargetType(Expression* target);
// statistical distributions
// 0 to the limit, logarithmic scale
Index logify(Index x) {
return std::floor(std::log(std::max(Index(1) + x, Index(1))));
}
};
} // namespace wasm