blob: bd527b51aecc2084530e8a5a86b9d52039ce011d [file] [log] [blame]
/*
* Copyright 2018 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef wasm_stack_h
#define wasm_stack_h
#include "ir/branch-utils.h"
#include "ir/properties.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "wasm-binary.h"
#include "wasm-traversal.h"
#include "wasm.h"
namespace wasm {
// Stack IR: an IR that represents code at the wasm binary format level,
// that is, a stack machine. Binaryen IR is *almost* identical to this,
// but as documented in README.md, there are a few differences, intended
// to make Binaryen IR fast and flexible for maximal optimization. Stack
// IR, on the other hand, is designed to optimize a few final things that
// can only really be done when modeling the stack machine format precisely.
// Currently the benefits of Stack IR are minor, less than 1% reduction in
// code size. For that reason it is just a secondary IR, run optionally
// after the main IR has been optimized. However, if we improve Stack IR
// optimizations to a point where they have a significant impact, it's
// possible that could motivate investigating replacing the main IR with Stack
// IR (so that we have just a single IR).
// A StackIR instance (see wasm.h) contains a linear sequence of
// stack instructions. This representation is very simple: just a single vector
// of all instructions, in order.
// * nullptr is allowed in the vector, representing something to skip.
// This is useful as a common thing optimizations do is remove instructions,
// so this way we can do so without compacting the vector all the time.
// Direct writing binaryen IR to binary is fast. Otherwise, StackIRGenerator
// lets you optimize the Stack IR before emitting stack IR to binary (but the
// cost is that the extra IR in the middle makes things 20% slower than emitting
// binaryen IR to binary directly).
// A Stack IR instruction. Most just directly reflect a Binaryen IR node,
// but we need extra ones for certain things.
class StackInst {
public:
StackInst(MixedArena&) {}
enum Op {
Basic, // an instruction directly corresponding to a non-control-flow
// Binaryen IR node
BlockBegin, // the beginning of a block
BlockEnd, // the ending of a block
IfBegin, // the beginning of a if
IfElse, // the else of a if
IfEnd, // the ending of a if
LoopBegin, // the beginning of a loop
LoopEnd, // the ending of a loop
TryBegin, // the beginning of a try
Catch, // the catch within a try
CatchAll, // the catch_all within a try
Delegate, // the delegate within a try
TryEnd // the ending of a try
} op;
Expression* origin; // the expression this originates from
// the type - usually identical to the origin type, but e.g. wasm has no
// unreachable blocks, they must be none
Type type;
};
class BinaryInstWriter : public OverriddenVisitor<BinaryInstWriter> {
public:
BinaryInstWriter(WasmBinaryWriter& parent,
BufferWithRandomAccess& o,
Function* func,
bool sourceMap,
bool DWARF)
: parent(parent), o(o), func(func), sourceMap(sourceMap), DWARF(DWARF) {}
void visit(Expression* curr) {
if (func && !sourceMap) {
parent.writeDebugLocation(curr, func);
}
OverriddenVisitor<BinaryInstWriter>::visit(curr);
if (func && !sourceMap) {
parent.writeDebugLocationEnd(curr, func);
}
}
#define DELEGATE(CLASS_TO_VISIT) \
void visit##CLASS_TO_VISIT(CLASS_TO_VISIT* curr);
#include "wasm-delegations.def"
void emitResultType(Type type);
void emitIfElse(If* curr);
void emitCatch(Try* curr, Index i);
void emitCatchAll(Try* curr);
void emitDelegate(Try* curr);
// emit an end at the end of a block/loop/if/try
void emitScopeEnd(Expression* curr);
// emit an end at the end of a function
void emitFunctionEnd();
void emitUnreachable();
void mapLocalsAndEmitHeader();
MappedLocals mappedLocals;
private:
void emitMemoryAccess(size_t alignment, size_t bytes, uint32_t offset);
int32_t getBreakIndex(Name name);
WasmBinaryWriter& parent;
BufferWithRandomAccess& o;
Function* func = nullptr;
bool sourceMap;
bool DWARF;
std::vector<Name> breakStack;
// The types of locals in the compact form, in order.
std::vector<Type> localTypes;
// type => number of locals of that type in the compact form
std::unordered_map<Type, size_t> numLocalsByType;
void noteLocalType(Type type);
// Keeps track of the binary index of the scratch locals used to lower
// tuple.extract.
InsertOrderedMap<Type, Index> scratchLocals;
void countScratchLocals();
void setScratchLocals();
};
// Takes binaryen IR and converts it to something else (binary or stack IR)
template<typename SubType>
class BinaryenIRWriter : public Visitor<BinaryenIRWriter<SubType>> {
public:
BinaryenIRWriter(Function* func) : func(func) {}
void write();
// visits a node, emitting the proper code for it
void visit(Expression* curr);
void visitBlock(Block* curr);
void visitIf(If* curr);
void visitLoop(Loop* curr);
void visitTry(Try* curr);
protected:
Function* func = nullptr;
private:
void emit(Expression* curr) { static_cast<SubType*>(this)->emit(curr); }
void emitHeader() { static_cast<SubType*>(this)->emitHeader(); }
void emitIfElse(If* curr) { static_cast<SubType*>(this)->emitIfElse(curr); }
void emitCatch(Try* curr, Index i) {
static_cast<SubType*>(this)->emitCatch(curr, i);
}
void emitCatchAll(Try* curr) {
static_cast<SubType*>(this)->emitCatchAll(curr);
}
void emitDelegate(Try* curr) {
static_cast<SubType*>(this)->emitDelegate(curr);
}
void emitScopeEnd(Expression* curr) {
static_cast<SubType*>(this)->emitScopeEnd(curr);
}
void emitFunctionEnd() { static_cast<SubType*>(this)->emitFunctionEnd(); }
void emitUnreachable() { static_cast<SubType*>(this)->emitUnreachable(); }
void emitDebugLocation(Expression* curr) {
static_cast<SubType*>(this)->emitDebugLocation(curr);
}
void visitPossibleBlockContents(Expression* curr);
};
template<typename SubType> void BinaryenIRWriter<SubType>::write() {
assert(func && "BinaryenIRWriter: function is not set");
emitHeader();
visitPossibleBlockContents(func->body);
emitFunctionEnd();
}
// emits a node, but if it is a block with no name, emit a list of its contents
template<typename SubType>
void BinaryenIRWriter<SubType>::visitPossibleBlockContents(Expression* curr) {
auto* block = curr->dynCast<Block>();
if (!block || BranchUtils::BranchSeeker::has(block, block->name)) {
visit(curr);
return;
}
for (auto* child : block->list) {
visit(child);
// Since this child was unreachable, either this child or one of its
// descendants was a source of unreachability that was actually
// emitted. Subsequent children won't be reachable, so skip them.
if (child->type == Type::unreachable) {
break;
}
}
}
template<typename SubType>
void BinaryenIRWriter<SubType>::visit(Expression* curr) {
emitDebugLocation(curr);
// We emit unreachable instructions that create unreachability, but not
// unreachable instructions that just inherit unreachability from their
// children, since the latter won't be reached. This (together with logic in
// the control flow visitors) also ensures that the final instruction in each
// unreachable block is a source of unreachability, which means we don't need
// to emit an extra `unreachable` before the end of the block to prevent type
// errors.
bool hasUnreachableChild = false;
for (auto* child : ValueChildIterator(curr)) {
visit(child);
if (child->type == Type::unreachable) {
hasUnreachableChild = true;
break;
}
}
if (hasUnreachableChild) {
// `curr` is not reachable, so don't emit it.
return;
}
// Control flow requires special handling, but most instructions can be
// emitted directly after their children.
if (Properties::isControlFlowStructure(curr)) {
Visitor<BinaryenIRWriter>::visit(curr);
} else {
emit(curr);
}
}
template<typename SubType>
void BinaryenIRWriter<SubType>::visitBlock(Block* curr) {
auto visitChildren = [this](Block* curr, Index from) {
auto& list = curr->list;
while (from < list.size()) {
auto* child = list[from];
visit(child);
if (child->type == Type::unreachable) {
break;
}
++from;
}
};
auto afterChildren = [this](Block* curr) {
emitScopeEnd(curr);
if (curr->type == Type::unreachable) {
// Since this block is unreachable, no instructions will be emitted after
// it in its enclosing scope. That means that this block will be the last
// instruction before the end of its parent scope, so its type must match
// the type of its parent. But we don't have a concrete type for this
// block and we don't know what type its parent expects, so we can't
// ensure the types match. To work around this, we insert an `unreachable`
// instruction after every unreachable control flow structure and depend
// on its polymorphic behavior to paper over any type mismatches.
emitUnreachable();
}
};
// Handle very deeply nested blocks in the first position efficiently,
// avoiding heavy recursion. We only start to do this if we see it will help
// us (to avoid allocation of the vector).
if (!curr->list.empty() && curr->list[0]->is<Block>()) {
std::vector<Block*> parents;
Block* child;
while (!curr->list.empty() && (child = curr->list[0]->dynCast<Block>())) {
parents.push_back(curr);
emit(curr);
curr = child;
}
// Emit the current block, which does not have a block as a child in the
// first position.
emit(curr);
visitChildren(curr, 0);
afterChildren(curr);
bool childUnreachable = curr->type == Type::unreachable;
// Finish the later parts of all the parent blocks.
while (!parents.empty()) {
auto* parent = parents.back();
parents.pop_back();
if (!childUnreachable) {
visitChildren(parent, 1);
}
afterChildren(parent);
childUnreachable = parent->type == Type::unreachable;
}
return;
}
// Simple case of not having a nested block in the first position.
emit(curr);
visitChildren(curr, 0);
afterChildren(curr);
}
template<typename SubType> void BinaryenIRWriter<SubType>::visitIf(If* curr) {
emit(curr);
visitPossibleBlockContents(curr->ifTrue);
if (curr->ifFalse) {
emitIfElse(curr);
visitPossibleBlockContents(curr->ifFalse);
}
emitScopeEnd(curr);
if (curr->type == Type::unreachable) {
// We already handled the case of the condition being unreachable in
// `visit`. Otherwise, we may still be unreachable, if we are an if-else
// with both sides unreachable. Just like with blocks, we emit an extra
// `unreachable` to work around potential type mismatches.
assert(curr->ifFalse);
emitUnreachable();
}
}
template<typename SubType>
void BinaryenIRWriter<SubType>::visitLoop(Loop* curr) {
emit(curr);
visitPossibleBlockContents(curr->body);
emitScopeEnd(curr);
if (curr->type == Type::unreachable) {
// we emitted a loop without a return type, so it must not be consumed
emitUnreachable();
}
}
template<typename SubType> void BinaryenIRWriter<SubType>::visitTry(Try* curr) {
emit(curr);
visitPossibleBlockContents(curr->body);
for (Index i = 0; i < curr->catchTags.size(); i++) {
emitCatch(curr, i);
visitPossibleBlockContents(curr->catchBodies[i]);
}
if (curr->hasCatchAll()) {
emitCatchAll(curr);
visitPossibleBlockContents(curr->catchBodies.back());
}
if (curr->isDelegate()) {
emitDelegate(curr);
// Note that when we emit a delegate we do not need to also emit a scope
// ending, as the delegate ends the scope.
} else {
emitScopeEnd(curr);
}
if (curr->type == Type::unreachable) {
emitUnreachable();
}
}
// Binaryen IR to binary writer
class BinaryenIRToBinaryWriter
: public BinaryenIRWriter<BinaryenIRToBinaryWriter> {
public:
BinaryenIRToBinaryWriter(WasmBinaryWriter& parent,
BufferWithRandomAccess& o,
Function* func = nullptr,
bool sourceMap = false,
bool DWARF = false)
: BinaryenIRWriter<BinaryenIRToBinaryWriter>(func), parent(parent),
writer(parent, o, func, sourceMap, DWARF), sourceMap(sourceMap) {}
void visit(Expression* curr) {
BinaryenIRWriter<BinaryenIRToBinaryWriter>::visit(curr);
}
void emit(Expression* curr) { writer.visit(curr); }
void emitHeader() {
if (func->prologLocation.size()) {
parent.writeDebugLocation(*func->prologLocation.begin());
}
writer.mapLocalsAndEmitHeader();
}
void emitIfElse(If* curr) { writer.emitIfElse(curr); }
void emitCatch(Try* curr, Index i) { writer.emitCatch(curr, i); }
void emitCatchAll(Try* curr) { writer.emitCatchAll(curr); }
void emitDelegate(Try* curr) { writer.emitDelegate(curr); }
void emitScopeEnd(Expression* curr) { writer.emitScopeEnd(curr); }
void emitFunctionEnd() {
if (func->epilogLocation.size()) {
parent.writeDebugLocation(*func->epilogLocation.begin());
}
writer.emitFunctionEnd();
}
void emitUnreachable() { writer.emitUnreachable(); }
void emitDebugLocation(Expression* curr) {
if (sourceMap) {
parent.writeDebugLocation(curr, func);
}
}
MappedLocals& getMappedLocals() { return writer.mappedLocals; }
private:
WasmBinaryWriter& parent;
BinaryInstWriter writer;
bool sourceMap;
};
// Binaryen IR to stack IR converter
// Queues the expressions linearly in Stack IR (SIR)
class StackIRGenerator : public BinaryenIRWriter<StackIRGenerator> {
public:
StackIRGenerator(Module& module, Function* func)
: BinaryenIRWriter<StackIRGenerator>(func), module(module) {}
void emit(Expression* curr);
void emitScopeEnd(Expression* curr);
void emitHeader() {}
void emitIfElse(If* curr) {
stackIR.push_back(makeStackInst(StackInst::IfElse, curr));
}
void emitCatch(Try* curr, Index i) {
stackIR.push_back(makeStackInst(StackInst::Catch, curr));
}
void emitCatchAll(Try* curr) {
stackIR.push_back(makeStackInst(StackInst::CatchAll, curr));
}
void emitDelegate(Try* curr) {
stackIR.push_back(makeStackInst(StackInst::Delegate, curr));
}
void emitFunctionEnd() {}
void emitUnreachable() {
stackIR.push_back(makeStackInst(Builder(module).makeUnreachable()));
}
void emitDebugLocation(Expression* curr) {}
StackIR& getStackIR() { return stackIR; }
private:
StackInst* makeStackInst(StackInst::Op op, Expression* origin);
StackInst* makeStackInst(Expression* origin) {
return makeStackInst(StackInst::Basic, origin);
}
Module& module;
StackIR stackIR; // filled in write()
};
// Stack IR to binary writer
class StackIRToBinaryWriter {
public:
StackIRToBinaryWriter(WasmBinaryWriter& parent,
BufferWithRandomAccess& o,
Function* func)
: writer(parent, o, func, false /* sourceMap */, false /* DWARF */),
func(func) {}
void write();
MappedLocals& getMappedLocals() { return writer.mappedLocals; }
private:
BinaryInstWriter writer;
Function* func;
};
} // namespace wasm
#endif // wasm_stack_h