blob: 89441a53fedd9fd37a44d899dae3c6cdce545ad2 [file] [log] [blame] [edit]
/*
* Copyright 2022 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Finds types which are only created in assignments to immutable globals. For
// such types we can replace a struct.get with a global.get when there is a
// single possible global, or if there are two then with this pattern:
//
// (struct.get $foo i
// (..ref..))
// =>
// (select
// (value1)
// (value2)
// (ref.eq
// (..ref..)
// (global.get $global1)))
//
// That is a valid transformation if there are only two struct.news of $foo, it
// is created in two immutable globals $global1 and $global2, the field is
// immutable, the values of field |i| in them are value1 and value2
// respectively, and $foo has no subtypes. In that situation, the reference must
// be one of those two, so we can compare the reference to the globals and pick
// the right value there. (We can also handle subtypes, if we look at their
// values as well, see below.)
//
// The benefit of this optimization is primarily in the case of constant values
// that we can heavily optimize, like function references (constant function
// refs let us inline, etc.). Function references cannot be directly compared,
// so we cannot use ConstantFieldPropagation or such with an extension to
// multiple values, as the select pattern shown above can't be used - it needs a
// comparison. But we can compare structs, so if the function references are in
// vtables, and the vtables follow the above pattern, then we can optimize.
//
// TODO: Only do the case with a select when shrinkLevel == 0?
//
#include <variant>
#include "ir/bits.h"
#include "ir/debuginfo.h"
#include "ir/find_all.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/possible-constant.h"
#include "ir/subtypes.h"
#include "ir/utils.h"
#include "pass.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
namespace {
struct GlobalStructInference : public Pass {
// Only modifies struct.get operations.
bool requiresNonNullableLocalFixups() override { return false; }
// Maps optimizable struct types to the globals whose init is a struct.new of
// them.
//
// We will remove unoptimizable types from here, so in practice, if a type is
// optimizable it will have an entry here, and not if not.
std::unordered_map<HeapType, std::vector<Name>> typeGlobals;
void run(Module* module) override {
if (!module->features.hasGC()) {
return;
}
if (!getPassOptions().closedWorld) {
Fatal() << "GSI requires --closed-world";
}
// First, find all the information we need. We need to know which struct
// types are created in functions, because we will not be able to optimize
// those.
using HeapTypes = std::unordered_set<HeapType>;
ModuleUtils::ParallelFunctionAnalysis<HeapTypes> analysis(
*module, [&](Function* func, HeapTypes& types) {
if (func->imported()) {
return;
}
for (auto* structNew : FindAll<StructNew>(func->body).list) {
auto type = structNew->type;
if (type.isRef()) {
types.insert(type.getHeapType());
}
}
});
// We cannot optimize types that appear in a struct.new in a function, which
// we just collected and merge now.
HeapTypes unoptimizable;
for (auto& [func, types] : analysis.map) {
for (auto type : types) {
unoptimizable.insert(type);
}
}
// Process the globals.
for (auto& global : module->globals) {
if (global->imported()) {
continue;
}
// We cannot optimize a type that appears in a non-toplevel location in a
// global init.
for (auto* structNew : FindAll<StructNew>(global->init).list) {
auto type = structNew->type;
if (type.isRef() && structNew != global->init) {
unoptimizable.insert(type.getHeapType());
}
}
if (!global->init->type.isRef()) {
continue;
}
auto type = global->init->type.getHeapType();
// The global's declared type must match the init's type. If not, say if
// we had a global declared as type |any| but that contains (ref $A), then
// that is not something we can optimize, as ref.eq on a global.get of
// that global will not validate. (This should not be a problem after
// GlobalSubtyping runs, which will specialize the type of the global.)
if (global->type != global->init->type) {
unoptimizable.insert(type);
continue;
}
// We cannot optimize mutable globals.
if (global->mutable_) {
unoptimizable.insert(type);
continue;
}
// Finally, if this is a struct.new then it is one we can optimize; note
// it.
if (global->init->is<StructNew>()) {
typeGlobals[type].push_back(global->name);
}
}
// A struct.get might also read from any of the subtypes. As a result, an
// unoptimizable type makes all its supertypes unoptimizable as well.
// TODO: this could be specific per field (and not all supers have all
// fields)
// Iterate on a copy to avoid invalidation as we insert.
auto unoptimizableCopy = unoptimizable;
for (auto type : unoptimizableCopy) {
while (1) {
unoptimizable.insert(type);
// Also erase the globals, as we will never read them anyhow. This can
// allow us to skip unneeded work, when we check if typeGlobals is
// empty, below.
typeGlobals.erase(type);
auto super = type.getDeclaredSuperType();
if (!super) {
break;
}
type = *super;
}
}
// Similarly, propagate global names: if one type has [global1], then a get
// of any supertype might access that, so propagate to them.
auto typeGlobalsCopy = typeGlobals;
for (auto& [type, globals] : typeGlobalsCopy) {
auto curr = type;
while (1) {
auto super = curr.getDeclaredSuperType();
if (!super) {
break;
}
curr = *super;
// As above, avoid adding pointless data for anything unoptimizable.
if (!unoptimizable.count(curr)) {
for (auto global : globals) {
typeGlobals[curr].push_back(global);
}
}
}
}
if (typeGlobals.empty()) {
// We found nothing we can optimize.
return;
}
// The above loop on typeGlobalsCopy is on an unsorted data structure, and
// that can lead to nondeterminism in typeGlobals. Sort the vectors there to
// ensure determinism.
for (auto& [type, globals] : typeGlobals) {
std::sort(globals.begin(), globals.end());
}
// We are looking for the case where we can pick between two values using a
// single comparison. More than two values, or more than a single
// comparison, lead to tradeoffs that may not be worth it.
//
// Note that situation may involve more than two globals. For example we may
// have three relevant globals, but two may have the same value. In that
// case we can compare against the third:
//
// $global0: (struct.new $Type (i32.const 42))
// $global1: (struct.new $Type (i32.const 42))
// $global2: (struct.new $Type (i32.const 1337))
//
// (struct.get $Type (ref))
// =>
// (select
// (i32.const 1337)
// (i32.const 42)
// (ref.eq (ref) $global2))
//
// To discover these situations, we compute and group the possible values
// that can be read from a particular struct.get, using the following data
// structure.
struct Value {
// A value is either a constant, or if not, then we point to whatever
// expression it is.
std::variant<PossibleConstantValues, Expression*> content;
// The list of globals that have this Value. In the example from above,
// the Value for 42 would list globals = [$global0, $global1].
// TODO: SmallVector?
std::vector<Name> globals;
bool isConstant() const {
return std::get_if<PossibleConstantValues>(&content);
}
const PossibleConstantValues& getConstant() const {
assert(isConstant());
return std::get<PossibleConstantValues>(content);
}
Expression* getExpression() const {
assert(!isConstant());
return std::get<Expression*>(content);
}
};
// Constant expressions are easy to handle, and we can emit a select as in
// the last example. But we can handle non-constant ones too, by un-nesting
// the relevant global. Imagine we have this:
//
// (global $g (struct.new $S
// (struct.new $T ..)
//
// We have a nested struct.new here. That is not a constant value, but we
// can turn it into a global.get:
//
// (global $g.nested (struct.new $T ..)
// (global $g (struct.new $S
// (global.get $g.nested)
//
// After this un-nesting we end up with a global.get of an immutable global,
// which is constant. Note that this adds a global and may increase code
// size slightly, but if it lets us infer constant values that may lead to
// devirtualization and other large benefits. Later passes can also re-nest.
//
// We do most of our optimization work in parallel, but we cannot add
// globals in parallel, so instead we note the places we need to un-nest in
// this data structure and process them at the end.
struct GlobalToUnnest {
// The global we want to refer to a nested part of, by un-nesting it. The
// global contains a struct.new, and we want to refer to one of the
// operands of the struct.new directly, which we can do by moving it out
// to its own new global.
Name global;
// The index of the struct.new in the global named |global|.
Index index;
// The global.get that should refer to the new global. At the end, after
// we create a new global and have a name for it, we update this get to
// point to it.
GlobalGet* get;
};
using GlobalsToUnnest = std::vector<GlobalToUnnest>;
struct FunctionOptimizer : PostWalker<FunctionOptimizer> {
private:
GlobalStructInference& parent;
GlobalsToUnnest& globalsToUnnest;
public:
FunctionOptimizer(GlobalStructInference& parent,
GlobalsToUnnest& globalsToUnnest)
: parent(parent), globalsToUnnest(globalsToUnnest) {}
bool refinalize = false;
void visitStructGet(StructGet* curr) {
auto type = curr->ref->type;
if (type == Type::unreachable) {
return;
}
// We must ignore the case of a non-struct heap type, that is, a bottom
// type (which is all that is left after we've already ruled out
// unreachable). Such things will not be in typeGlobals, which we are
// checking now anyhow.
auto heapType = type.getHeapType();
auto iter = parent.typeGlobals.find(heapType);
if (iter == parent.typeGlobals.end()) {
return;
}
// This cannot be a bottom type as we found it in the typeGlobals map,
// which only contains types of struct.news.
assert(heapType.isStruct());
// The field must be immutable.
auto fieldIndex = curr->index;
auto& field = heapType.getStruct().fields[fieldIndex];
if (field.mutable_ == Mutable) {
return;
}
const auto& globals = iter->second;
if (globals.size() == 0) {
return;
}
auto& wasm = *getModule();
Builder builder(wasm);
if (globals.size() == 1) {
// Leave it to other passes to infer the constant value of the field,
// if there is one: just change the reference to the global, which
// will unlock those other optimizations. Note we must trap if the ref
// is null, so add RefAsNonNull here.
auto global = globals[0];
auto globalType = wasm.getGlobal(global)->type;
if (globalType != curr->ref->type) {
// The struct.get will now read from something of the type of the
// global, which is different, so the field being read might be
// refined, which could change the struct.get's type.
refinalize = true;
}
// No need to worry about atomic gets here. We will still read from
// the same memory location as before and preserve all side effects
// (including synchronization) that were previously present. The
// memory location is immutable anyway, so there cannot be any writes
// to synchronize with in the first place.
curr->ref = builder.makeSequence(
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
builder.makeGlobalGet(global, globalType));
return;
}
// TODO: SmallVector?
std::vector<Value> values;
// Scan the relevant struct.new operands.
auto fieldType = field.type;
for (Index i = 0; i < globals.size(); i++) {
Name global = globals[i];
auto* structNew = wasm.getGlobal(global)->init->cast<StructNew>();
// The value that is read from this struct.new.
Value value;
// Find the value read from the struct and represent it as a Value.
PossibleConstantValues constant;
if (structNew->isWithDefault()) {
constant.note(Literal::makeZero(fieldType));
value.content = constant;
} else {
Expression* operand = structNew->operands[fieldIndex];
constant.note(operand, wasm);
if (constant.isConstant()) {
value.content = constant;
} else {
value.content = operand;
}
}
// If the value is constant, it may be grouped as mentioned before.
// See if it matches anything we've seen before.
bool grouped = false;
if (value.isConstant()) {
for (auto& oldValue : values) {
if (oldValue.isConstant() &&
oldValue.getConstant() == value.getConstant()) {
// Add us to this group.
oldValue.globals.push_back(global);
grouped = true;
break;
}
}
}
if (!grouped) {
// This is a new value, so create a new group, unless we've seen too
// many unique values. In that case, give up.
if (values.size() == 2) {
return;
}
value.globals.push_back(global);
values.push_back(value);
}
}
// Helper for optimization: Given a Value, returns what we should read
// for it.
auto getReadValue = [&](const Value& value) -> Expression* {
Expression* ret;
if (value.isConstant()) {
// This is known to be a constant, so simply emit an expression for
// that constant, and handle if the field is packed.
auto* expr = value.getConstant().makeExpression(wasm);
ret = Bits::makePackedFieldGet(expr, field, curr->signed_, wasm);
} else {
// Otherwise, this is non-constant, so we are in the situation where
// we want to un-nest the value out of the struct.new it is in. Note
// that for later work, as we cannot add a global in parallel.
// There can only be one global in a value that is not constant,
// which is the global we want to read from.
assert(value.globals.size() == 1);
// Create a global.get with temporary name, leaving only the
// updating of the name to later work.
auto* get = builder.makeGlobalGet(value.globals[0],
value.getExpression()->type);
globalsToUnnest.emplace_back(
GlobalToUnnest{value.globals[0], fieldIndex, get});
ret = get;
}
// This value replaces the struct.get, so it should have the same
// source location.
debuginfo::copyOriginalToReplacement(curr, ret, getFunction());
return ret;
};
// We have some globals (at least 2), and so must have at least one
// value. And we have already exited if we have more than 2 values (see
// the early return above) so that only leaves 1 and 2.
if (values.size() == 1) {
// The case of 1 value is simple: trap if the ref is null, and
// otherwise return the value. Since the field is immutable, there
// cannot have been any writes to it we must synchonize with, so we do
// not need a fence.
replaceCurrent(builder.makeSequence(
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
getReadValue(values[0])));
return;
}
assert(values.size() == 2);
// We have two values. Check that we can pick between them using a
// single comparison. While doing so, ensure that the index we can check
// on is 0, that is, the first value has a single global.
if (values[0].globals.size() == 1) {
// The checked global is already in index 0.
} else if (values[1].globals.size() == 1) {
// Flip so the value to check is in index 0.
std::swap(values[0], values[1]);
} else {
// Both indexes have more than one option, so we'd need more than one
// comparison. Give up.
return;
}
// Excellent, we can optimize here! Emit a select.
auto checkGlobal = values[0].globals[0];
// Compute the left and right values before the next line, as the order
// of their execution matters (they may note globals for un-nesting).
auto* left = getReadValue(values[0]);
auto* right = getReadValue(values[1]);
// Note that we must trap on null, so add a ref.as_non_null here. As
// before, the get cannot have synchronized with anything.
Expression* getGlobal =
builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type);
replaceCurrent(builder.makeSelect(
builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref),
getGlobal),
left,
right));
}
void visitFunction(Function* func) {
if (refinalize) {
ReFinalize().walkFunctionInModule(func, getModule());
}
}
};
// Find the optimization opportunitites in parallel.
ModuleUtils::ParallelFunctionAnalysis<GlobalsToUnnest> optimization(
*module, [&](Function* func, GlobalsToUnnest& globalsToUnnest) {
if (func->imported()) {
return;
}
FunctionOptimizer optimizer(*this, globalsToUnnest);
optimizer.walkFunctionInModule(func, module);
});
// Un-nest any globals as needed, using the deterministic order of the
// functions in the module.
Builder builder(*module);
auto addedGlobals = false;
for (auto& func : module->functions) {
// Each work item here is a global with a struct.new, from which we want
// to read a particular index, from a particular global.get.
for (auto& [globalName, index, get] : optimization.map[func.get()]) {
auto* global = module->getGlobal(globalName);
auto* structNew = global->init->cast<StructNew>();
assert(index < structNew->operands.size());
auto*& operand = structNew->operands[index];
// If we already un-nested this then we don't need to repeat that work.
if (auto* nestedGet = operand->dynCast<GlobalGet>()) {
// We already un-nested, and this global.get refers to the new global.
// Simply copy the target.
get->name = nestedGet->name;
assert(get->type == nestedGet->type);
} else {
// Add a new global, initialized to the operand.
auto newName = Names::getValidGlobalName(
*module,
global->name.toString() + ".unnested." + std::to_string(index));
module->addGlobal(builder.makeGlobal(
newName, get->type, operand, Builder::Immutable));
// Replace the operand with a get of that new global, and update the
// original get to read the same.
operand = builder.makeGlobalGet(newName, get->type);
get->name = newName;
addedGlobals = true;
}
}
}
if (addedGlobals) {
// Sort the globals so that added ones appear before their uses.
PassRunner runner(module);
runner.add("reorder-globals-always");
runner.setIsNested(true);
runner.run();
}
}
};
} // anonymous namespace
Pass* createGlobalStructInferencePass() { return new GlobalStructInference(); }
} // namespace wasm