blob: 605caba41c1170ef975955965b0338b5da204d42 [file] [log] [blame] [edit]
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Optimizes heap (GC) stores.
//
// TODO: Add dead store elimination / load forwarding here.
//
#include "cfg/cfg-traversal.h"
#include "ir/effects.h"
#include "ir/local-graph.h"
#include "pass.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
namespace {
// In each basic block we will store the relevant heap store operations and
// other actions that matter to our analysis.
struct Info {
std::vector<Expression**> actions;
};
struct HeapStoreOptimization
: public WalkerPass<
CFGWalker<HeapStoreOptimization, Visitor<HeapStoreOptimization>, Info>> {
bool isFunctionParallel() override { return true; }
// Locals are not modified here.
bool requiresNonNullableLocalFixups() override { return false; }
std::unique_ptr<Pass> create() override {
return std::make_unique<HeapStoreOptimization>();
}
// Branches outside of the function can be ignored, as we only look at local
// state in the function. (This may need to change if we do more general dead
// store elimination.)
bool ignoreBranchesOutsideOfFunc = true;
// Store struct.sets and blocks, as we can find patterns among those.
void addAction() {
if (currBasicBlock) {
currBasicBlock->contents.actions.push_back(getCurrentPointer());
}
}
void visitStructSet(StructSet* curr) { addAction(); }
void visitBlock(Block* curr) { addAction(); }
void visitFunction(Function* curr) {
// Now that the walk is complete and we have a CFG, find things to optimize.
for (auto& block : basicBlocks) {
for (auto** currp : block->contents.actions) {
auto* curr = *currp;
if (auto* set = curr->dynCast<StructSet>()) {
optimizeStructSet(set, currp);
} else if (auto* block = curr->dynCast<Block>()) {
optimizeBlock(block);
} else {
WASM_UNREACHABLE("bad action");
}
}
}
}
// Optimize a struct.set. Receives also a pointer to where it is referred to,
// so we can replace it (which we do if we optimize).
void optimizeStructSet(StructSet* curr, Expression** currp) {
// If our reference is a tee of a struct.new, we may be able to fold the
// stored value into the new itself:
//
// (struct.set (local.tee $x (struct.new X Y Z)) X')
// =>
// (local.set $x (struct.new X' Y Z))
//
if (auto* tee = curr->ref->dynCast<LocalSet>()) {
if (auto* new_ = tee->value->dynCast<StructNew>()) {
if (optimizeSubsequentStructSet(new_, curr, tee)) {
// Success, so we do not need the struct.set any more, and the tee
// can just be a set instead of us.
tee->makeSet();
*currp = tee;
}
}
}
}
// Similar to the above with struct.set whose reference is a tee of a new, we
// can do the same for subsequent sets in a list:
//
// (local.set $x (struct.new X Y Z))
// (struct.set (local.get $x) X')
// =>
// (local.set $x (struct.new X' Y Z))
//
// We also handle other struct.sets immediately after this one. If the
// instruction following the new is not a struct.set we push the new down if
// possible.
void optimizeBlock(Block* curr) {
auto& list = curr->list;
for (Index i = 0; i < list.size(); i++) {
auto* localSet = list[i]->dynCast<LocalSet>();
if (!localSet) {
continue;
}
auto* new_ = localSet->value->dynCast<StructNew>();
if (!new_) {
continue;
}
// This local.set of a struct.new looks good. Find struct.sets after it to
// optimize.
Index localSetIndex = i;
for (Index j = localSetIndex + 1; j < list.size(); j++) {
// Check that the next instruction is a struct.set on the same local as
// the struct.new.
auto* structSet = list[j]->dynCast<StructSet>();
auto* localGet =
structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
if (!structSet || !localGet || localGet->index != localSet->index) {
// Any time the pattern no longer matches, we try to push the
// struct.new further down but if it is not possible we stop
// optimizing possible struct.sets for this struct.new.
if (trySwap(list, localSetIndex, j)) {
// Update the index and continue to try again.
localSetIndex = j;
continue;
}
break;
}
// The pattern matches, try to optimize.
if (!optimizeSubsequentStructSet(new_, structSet, localSet)) {
break;
} else {
// Success. Replace the set with a nop, and continue to perhaps
// optimize more.
ExpressionManipulator::nop(structSet);
}
}
}
}
// Helper function for optimizeHeapStores. Tries pushing the struct.new at
// index i down to index j, swapping it with the instruction already at j, so
// that it is closer to (potential) later struct.sets.
bool trySwap(ExpressionList& list, Index i, Index j) {
if (j == list.size() - 1) {
// There is no reason to swap with the last element of the list as it
// won't match the pattern because there wont be anything after. This also
// avoids swapping an instruction that does not leave anything in the
// stack by one that could leave something, and that which would be
// incorrect.
return false;
}
if (list[j]->is<LocalSet>() &&
list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
// Don't swap two struct.new instructions to avoid going back and forth.
return false;
}
// Check if the two expressions can be swapped safely considering their
// effects.
auto firstEffects = effects(list[i]);
auto secondEffects = effects(list[j]);
if (secondEffects.invalidates(firstEffects)) {
return false;
}
std::swap(list[i], list[j]);
return true;
}
// Given a struct.new and a struct.set that occurs right after it, and that
// applies to the same data, try to apply the set during the new. This can be
// either with a nested tee:
//
// (struct.set
// (local.tee $x (struct.new X Y Z))
// X'
// )
// =>
// (local.set $x (struct.new X' Y Z))
//
// or without:
//
// (local.set $x (struct.new X Y Z))
// (struct.set (local.get $x) X')
// =>
// (local.set $x (struct.new X' Y Z))
//
// Returns true if we succeeded.
bool optimizeSubsequentStructSet(StructNew* new_,
StructSet* set,
LocalSet* localSet) {
// Leave unreachable code for DCE, to avoid updating types here.
if (new_->type == Type::unreachable || set->type == Type::unreachable) {
return false;
}
auto index = set->index;
auto& operands = new_->operands;
auto refLocalIndex = localSet->index;
// Check for effects that prevent us moving the struct.set's value (X' in
// the function comment) into its new position in the struct.new. First, it
// must be ok to move it past the local.set (otherwise, it might read from
// memory using that local, and depend on the struct.new having already
// occurred; or, if it writes to that local, then it would cross another
// write).
auto setValueEffects = effects(set->value);
if (setValueEffects.localsRead.count(refLocalIndex) ||
setValueEffects.localsWritten.count(refLocalIndex)) {
return false;
}
// We must move the set's value past indexes greater than it (Y and Z in
// the example in the comment on this function). If this is not with_default
// then we must check for effects.
// TODO When this function is called repeatedly in a sequence this can
// become quadratic - perhaps we should memoize (though, struct sizes
// tend to not be ridiculously large).
if (!new_->isWithDefault()) {
for (Index i = index + 1; i < operands.size(); i++) {
auto operandEffects = effects(operands[i]);
if (operandEffects.invalidates(setValueEffects)) {
// TODO: we could use locals to reorder everything
return false;
}
}
}
// We must also be careful of branches out from the value that skip the
// local.set, see below.
if (canSkipLocalSet(set, setValueEffects, localSet)) {
return false;
}
// We can optimize here!
Builder builder(*getModule());
// If this was with_default then we add default values now. That does
// increase code size in some cases (if there are many values, and few sets
// that get removed), but in general this optimization is worth it.
if (new_->isWithDefault()) {
auto& fields = new_->type.getHeapType().getStruct().fields;
for (auto& field : fields) {
auto zero = Literal::makeZero(field.type);
operands.push_back(builder.makeConstantExpression(zero));
}
}
// See if we need to keep the old value.
if (effects(operands[index]).hasUnremovableSideEffects()) {
operands[index] =
builder.makeSequence(builder.makeDrop(operands[index]), set->value);
} else {
operands[index] = set->value;
}
return true;
}
// We must be careful of branches that skip the local.set. For example:
//
// (block $out
// (local.set $x (struct.new X Y Z))
// (struct.set (local.get $x) (..br $out..)) ;; X' here has a br
// )
// ..local.get $x..
//
// Note how we do the local.set first. Imagine we optimized to this:
//
// (block $out
// (local.set $x (struct.new (..br $out..) Y Z))
// )
// ..local.get $x..
//
// Now the br happens first, skipping the local.set entirely, and the use
// later down will not get the proper value. This is the problem we must
// detect here.
//
// We are given a struct.set, the computed effects of its value (the caller
// already has those, so this is an optimization to avoid recomputation), and
// the local.set.
bool canSkipLocalSet(StructSet* set,
const EffectAnalyzer& setValueEffects,
LocalSet* localSet) {
// First, if the set's value cannot branch at all, then we have no problem.
if (!setValueEffects.transfersControlFlow()) {
return false;
}
// We may branch, so do an analysis using a LocalGraph. We will check
// whether it is valid to move the local.set to where the struct.set is, so
// we provide StructSet as the query class.
//
// It is valid to reuse the LocalGraph many times because the optimization
// that we do in this pass does not generate new, dangerous control flow. We
// only optimize if moving a LocalSet is valid, and that does not invalidate
// any other one.
if (!localGraph) {
localGraph.emplace(getFunction(), getModule(), StructSet::SpecificId);
}
auto badGets = localGraph->canMoveSet(localSet, set);
if (badGets.size() == 0) {
// No problems at all.
return false;
}
// It is ok to have a local.get in the struct.set itself, as if we optimize
// then that local.get goes away anyhow, that is,
//
// (local.set $x (struct.new X Y Z))
// (struct.set (local.get $x) (X'))
// =>
// (local.set $x (struct.new X' Y Z))
//
// Both the local.get and the struct.set are removed.
if (badGets.size() >= 2) {
return true;
}
return *badGets.begin() != set->ref;
}
EffectAnalyzer effects(Expression* expr) {
return EffectAnalyzer(getPassOptions(), *getModule(), expr);
}
private:
// A local graph that is constructed the first time we need it.
std::optional<LazyLocalGraph> localGraph;
};
} // anonymous namespace
Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); }
} // namespace wasm