| /* |
| * Copyright 2022 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Finds types which are only created in assignments to immutable globals. For |
| // such types we can replace a struct.get with a global.get when there is a |
| // single possible global, or if there are two then with this pattern: |
| // |
| // (struct.get $foo i |
| // (..ref..)) |
| // => |
| // (select |
| // (value1) |
| // (value2) |
| // (ref.eq |
| // (..ref..) |
| // (global.get $global1))) |
| // |
| // That is a valid transformation if there are only two struct.news of $foo, it |
| // is created in two immutable globals $global1 and $global2, the field is |
| // immutable, the values of field |i| in them are value1 and value2 |
| // respectively, and $foo has no subtypes. In that situation, the reference must |
| // be one of those two, so we can compare the reference to the globals and pick |
| // the right value there. (We can also handle subtypes, if we look at their |
| // values as well, see below.) |
| // |
| // The benefit of this optimization is primarily in the case of constant values |
| // that we can heavily optimize, like function references (constant function |
| // refs let us inline, etc.). Function references cannot be directly compared, |
| // so we cannot use ConstantFieldPropagation or such with an extension to |
| // multiple values, as the select pattern shown above can't be used - it needs a |
| // comparison. But we can compare structs, so if the function references are in |
| // vtables, and the vtables follow the above pattern, then we can optimize. |
| // |
| // TODO: Only do the case with a select when shrinkLevel == 0? |
| // |
| |
| #include <variant> |
| |
| #include "ir/bits.h" |
| #include "ir/debuginfo.h" |
| #include "ir/find_all.h" |
| #include "ir/module-utils.h" |
| #include "ir/names.h" |
| #include "ir/possible-constant.h" |
| #include "ir/subtypes.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "wasm-builder.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| namespace { |
| |
| struct GlobalStructInference : public Pass { |
| // Only modifies struct.get operations. |
| bool requiresNonNullableLocalFixups() override { return false; } |
| |
| // Maps optimizable struct types to the globals whose init is a struct.new of |
| // them. |
| // |
| // We will remove unoptimizable types from here, so in practice, if a type is |
| // optimizable it will have an entry here, and not if not. |
| std::unordered_map<HeapType, std::vector<Name>> typeGlobals; |
| |
| void run(Module* module) override { |
| if (!module->features.hasGC()) { |
| return; |
| } |
| |
| if (!getPassOptions().closedWorld) { |
| Fatal() << "GSI requires --closed-world"; |
| } |
| |
| // First, find all the information we need. We need to know which struct |
| // types are created in functions, because we will not be able to optimize |
| // those. |
| |
| using HeapTypes = std::unordered_set<HeapType>; |
| |
| ModuleUtils::ParallelFunctionAnalysis<HeapTypes> analysis( |
| *module, [&](Function* func, HeapTypes& types) { |
| if (func->imported()) { |
| return; |
| } |
| |
| for (auto* structNew : FindAll<StructNew>(func->body).list) { |
| auto type = structNew->type; |
| if (type.isRef()) { |
| types.insert(type.getHeapType()); |
| } |
| } |
| }); |
| |
| // We cannot optimize types that appear in a struct.new in a function, which |
| // we just collected and merge now. |
| HeapTypes unoptimizable; |
| |
| for (auto& [func, types] : analysis.map) { |
| for (auto type : types) { |
| unoptimizable.insert(type); |
| } |
| } |
| |
| // Process the globals. |
| for (auto& global : module->globals) { |
| if (global->imported()) { |
| continue; |
| } |
| |
| // We cannot optimize a type that appears in a non-toplevel location in a |
| // global init. |
| for (auto* structNew : FindAll<StructNew>(global->init).list) { |
| auto type = structNew->type; |
| if (type.isRef() && structNew != global->init) { |
| unoptimizable.insert(type.getHeapType()); |
| } |
| } |
| |
| if (!global->init->type.isRef()) { |
| continue; |
| } |
| |
| auto type = global->init->type.getHeapType(); |
| |
| // The global's declared type must match the init's type. If not, say if |
| // we had a global declared as type |any| but that contains (ref $A), then |
| // that is not something we can optimize, as ref.eq on a global.get of |
| // that global will not validate. (This should not be a problem after |
| // GlobalSubtyping runs, which will specialize the type of the global.) |
| if (global->type != global->init->type) { |
| unoptimizable.insert(type); |
| continue; |
| } |
| |
| // We cannot optimize mutable globals. |
| if (global->mutable_) { |
| unoptimizable.insert(type); |
| continue; |
| } |
| |
| // Finally, if this is a struct.new then it is one we can optimize; note |
| // it. |
| if (global->init->is<StructNew>()) { |
| typeGlobals[type].push_back(global->name); |
| } |
| } |
| |
| // A struct.get might also read from any of the subtypes. As a result, an |
| // unoptimizable type makes all its supertypes unoptimizable as well. |
| // TODO: this could be specific per field (and not all supers have all |
| // fields) |
| // Iterate on a copy to avoid invalidation as we insert. |
| auto unoptimizableCopy = unoptimizable; |
| for (auto type : unoptimizableCopy) { |
| while (1) { |
| unoptimizable.insert(type); |
| |
| // Also erase the globals, as we will never read them anyhow. This can |
| // allow us to skip unneeded work, when we check if typeGlobals is |
| // empty, below. |
| typeGlobals.erase(type); |
| |
| auto super = type.getDeclaredSuperType(); |
| if (!super) { |
| break; |
| } |
| type = *super; |
| } |
| } |
| |
| // Similarly, propagate global names: if one type has [global1], then a get |
| // of any supertype might access that, so propagate to them. |
| auto typeGlobalsCopy = typeGlobals; |
| for (auto& [type, globals] : typeGlobalsCopy) { |
| auto curr = type; |
| while (1) { |
| auto super = curr.getDeclaredSuperType(); |
| if (!super) { |
| break; |
| } |
| curr = *super; |
| |
| // As above, avoid adding pointless data for anything unoptimizable. |
| if (!unoptimizable.count(curr)) { |
| for (auto global : globals) { |
| typeGlobals[curr].push_back(global); |
| } |
| } |
| } |
| } |
| |
| if (typeGlobals.empty()) { |
| // We found nothing we can optimize. |
| return; |
| } |
| |
| // The above loop on typeGlobalsCopy is on an unsorted data structure, and |
| // that can lead to nondeterminism in typeGlobals. Sort the vectors there to |
| // ensure determinism. |
| for (auto& [type, globals] : typeGlobals) { |
| std::sort(globals.begin(), globals.end()); |
| } |
| |
| // We are looking for the case where we can pick between two values using a |
| // single comparison. More than two values, or more than a single |
| // comparison, lead to tradeoffs that may not be worth it. |
| // |
| // Note that situation may involve more than two globals. For example we may |
| // have three relevant globals, but two may have the same value. In that |
| // case we can compare against the third: |
| // |
| // $global0: (struct.new $Type (i32.const 42)) |
| // $global1: (struct.new $Type (i32.const 42)) |
| // $global2: (struct.new $Type (i32.const 1337)) |
| // |
| // (struct.get $Type (ref)) |
| // => |
| // (select |
| // (i32.const 1337) |
| // (i32.const 42) |
| // (ref.eq (ref) $global2)) |
| // |
| // To discover these situations, we compute and group the possible values |
| // that can be read from a particular struct.get, using the following data |
| // structure. |
| struct Value { |
| // A value is either a constant, or if not, then we point to whatever |
| // expression it is. |
| std::variant<PossibleConstantValues, Expression*> content; |
| // The list of globals that have this Value. In the example from above, |
| // the Value for 42 would list globals = [$global0, $global1]. |
| // TODO: SmallVector? |
| std::vector<Name> globals; |
| |
| bool isConstant() const { |
| return std::get_if<PossibleConstantValues>(&content); |
| } |
| |
| const PossibleConstantValues& getConstant() const { |
| assert(isConstant()); |
| return std::get<PossibleConstantValues>(content); |
| } |
| |
| Expression* getExpression() const { |
| assert(!isConstant()); |
| return std::get<Expression*>(content); |
| } |
| }; |
| |
| // Constant expressions are easy to handle, and we can emit a select as in |
| // the last example. But we can handle non-constant ones too, by un-nesting |
| // the relevant global. Imagine we have this: |
| // |
| // (global $g (struct.new $S |
| // (struct.new $T ..) |
| // |
| // We have a nested struct.new here. That is not a constant value, but we |
| // can turn it into a global.get: |
| // |
| // (global $g.nested (struct.new $T ..) |
| // (global $g (struct.new $S |
| // (global.get $g.nested) |
| // |
| // After this un-nesting we end up with a global.get of an immutable global, |
| // which is constant. Note that this adds a global and may increase code |
| // size slightly, but if it lets us infer constant values that may lead to |
| // devirtualization and other large benefits. Later passes can also re-nest. |
| // |
| // We do most of our optimization work in parallel, but we cannot add |
| // globals in parallel, so instead we note the places we need to un-nest in |
| // this data structure and process them at the end. |
| struct GlobalToUnnest { |
| // The global we want to refer to a nested part of, by un-nesting it. The |
| // global contains a struct.new, and we want to refer to one of the |
| // operands of the struct.new directly, which we can do by moving it out |
| // to its own new global. |
| Name global; |
| // The index of the struct.new in the global named |global|. |
| Index index; |
| // The global.get that should refer to the new global. At the end, after |
| // we create a new global and have a name for it, we update this get to |
| // point to it. |
| GlobalGet* get; |
| }; |
| using GlobalsToUnnest = std::vector<GlobalToUnnest>; |
| |
| struct FunctionOptimizer : PostWalker<FunctionOptimizer> { |
| private: |
| GlobalStructInference& parent; |
| GlobalsToUnnest& globalsToUnnest; |
| |
| public: |
| FunctionOptimizer(GlobalStructInference& parent, |
| GlobalsToUnnest& globalsToUnnest) |
| : parent(parent), globalsToUnnest(globalsToUnnest) {} |
| |
| bool refinalize = false; |
| |
| void visitStructGet(StructGet* curr) { |
| auto type = curr->ref->type; |
| if (type == Type::unreachable) { |
| return; |
| } |
| |
| // We must ignore the case of a non-struct heap type, that is, a bottom |
| // type (which is all that is left after we've already ruled out |
| // unreachable). Such things will not be in typeGlobals, which we are |
| // checking now anyhow. |
| auto heapType = type.getHeapType(); |
| auto iter = parent.typeGlobals.find(heapType); |
| if (iter == parent.typeGlobals.end()) { |
| return; |
| } |
| |
| // This cannot be a bottom type as we found it in the typeGlobals map, |
| // which only contains types of struct.news. |
| assert(heapType.isStruct()); |
| |
| // The field must be immutable. |
| auto fieldIndex = curr->index; |
| auto& field = heapType.getStruct().fields[fieldIndex]; |
| if (field.mutable_ == Mutable) { |
| return; |
| } |
| |
| const auto& globals = iter->second; |
| if (globals.size() == 0) { |
| return; |
| } |
| |
| auto& wasm = *getModule(); |
| Builder builder(wasm); |
| |
| if (globals.size() == 1) { |
| // Leave it to other passes to infer the constant value of the field, |
| // if there is one: just change the reference to the global, which |
| // will unlock those other optimizations. Note we must trap if the ref |
| // is null, so add RefAsNonNull here. |
| auto global = globals[0]; |
| auto globalType = wasm.getGlobal(global)->type; |
| if (globalType != curr->ref->type) { |
| // The struct.get will now read from something of the type of the |
| // global, which is different, so the field being read might be |
| // refined, which could change the struct.get's type. |
| refinalize = true; |
| } |
| // No need to worry about atomic gets here. We will still read from |
| // the same memory location as before and preserve all side effects |
| // (including synchronization) that were previously present. The |
| // memory location is immutable anyway, so there cannot be any writes |
| // to synchronize with in the first place. |
| curr->ref = builder.makeSequence( |
| builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), |
| builder.makeGlobalGet(global, globalType)); |
| return; |
| } |
| |
| // TODO: SmallVector? |
| std::vector<Value> values; |
| |
| // Scan the relevant struct.new operands. |
| auto fieldType = field.type; |
| for (Index i = 0; i < globals.size(); i++) { |
| Name global = globals[i]; |
| auto* structNew = wasm.getGlobal(global)->init->cast<StructNew>(); |
| // The value that is read from this struct.new. |
| Value value; |
| |
| // Find the value read from the struct and represent it as a Value. |
| PossibleConstantValues constant; |
| if (structNew->isWithDefault()) { |
| constant.note(Literal::makeZero(fieldType)); |
| value.content = constant; |
| } else { |
| Expression* operand = structNew->operands[fieldIndex]; |
| constant.note(operand, wasm); |
| if (constant.isConstant()) { |
| value.content = constant; |
| } else { |
| value.content = operand; |
| } |
| } |
| |
| // If the value is constant, it may be grouped as mentioned before. |
| // See if it matches anything we've seen before. |
| bool grouped = false; |
| if (value.isConstant()) { |
| for (auto& oldValue : values) { |
| if (oldValue.isConstant() && |
| oldValue.getConstant() == value.getConstant()) { |
| // Add us to this group. |
| oldValue.globals.push_back(global); |
| grouped = true; |
| break; |
| } |
| } |
| } |
| if (!grouped) { |
| // This is a new value, so create a new group, unless we've seen too |
| // many unique values. In that case, give up. |
| if (values.size() == 2) { |
| return; |
| } |
| value.globals.push_back(global); |
| values.push_back(value); |
| } |
| } |
| |
| // Helper for optimization: Given a Value, returns what we should read |
| // for it. |
| auto getReadValue = [&](const Value& value) -> Expression* { |
| Expression* ret; |
| if (value.isConstant()) { |
| // This is known to be a constant, so simply emit an expression for |
| // that constant, and handle if the field is packed. |
| auto* expr = value.getConstant().makeExpression(wasm); |
| ret = Bits::makePackedFieldGet(expr, field, curr->signed_, wasm); |
| } else { |
| // Otherwise, this is non-constant, so we are in the situation where |
| // we want to un-nest the value out of the struct.new it is in. Note |
| // that for later work, as we cannot add a global in parallel. |
| |
| // There can only be one global in a value that is not constant, |
| // which is the global we want to read from. |
| assert(value.globals.size() == 1); |
| |
| // Create a global.get with temporary name, leaving only the |
| // updating of the name to later work. |
| auto* get = builder.makeGlobalGet(value.globals[0], |
| value.getExpression()->type); |
| |
| globalsToUnnest.emplace_back( |
| GlobalToUnnest{value.globals[0], fieldIndex, get}); |
| |
| ret = get; |
| } |
| |
| // This value replaces the struct.get, so it should have the same |
| // source location. |
| debuginfo::copyOriginalToReplacement(curr, ret, getFunction()); |
| |
| return ret; |
| }; |
| |
| // We have some globals (at least 2), and so must have at least one |
| // value. And we have already exited if we have more than 2 values (see |
| // the early return above) so that only leaves 1 and 2. |
| if (values.size() == 1) { |
| // The case of 1 value is simple: trap if the ref is null, and |
| // otherwise return the value. Since the field is immutable, there |
| // cannot have been any writes to it we must synchonize with, so we do |
| // not need a fence. |
| replaceCurrent(builder.makeSequence( |
| builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), |
| getReadValue(values[0]))); |
| return; |
| } |
| assert(values.size() == 2); |
| |
| // We have two values. Check that we can pick between them using a |
| // single comparison. While doing so, ensure that the index we can check |
| // on is 0, that is, the first value has a single global. |
| if (values[0].globals.size() == 1) { |
| // The checked global is already in index 0. |
| } else if (values[1].globals.size() == 1) { |
| // Flip so the value to check is in index 0. |
| std::swap(values[0], values[1]); |
| } else { |
| // Both indexes have more than one option, so we'd need more than one |
| // comparison. Give up. |
| return; |
| } |
| |
| // Excellent, we can optimize here! Emit a select. |
| |
| auto checkGlobal = values[0].globals[0]; |
| // Compute the left and right values before the next line, as the order |
| // of their execution matters (they may note globals for un-nesting). |
| auto* left = getReadValue(values[0]); |
| auto* right = getReadValue(values[1]); |
| // Note that we must trap on null, so add a ref.as_non_null here. As |
| // before, the get cannot have synchronized with anything. |
| Expression* getGlobal = |
| builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type); |
| replaceCurrent(builder.makeSelect( |
| builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref), |
| getGlobal), |
| left, |
| right)); |
| } |
| |
| void visitFunction(Function* func) { |
| if (refinalize) { |
| ReFinalize().walkFunctionInModule(func, getModule()); |
| } |
| } |
| }; |
| |
| // Find the optimization opportunitites in parallel. |
| ModuleUtils::ParallelFunctionAnalysis<GlobalsToUnnest> optimization( |
| *module, [&](Function* func, GlobalsToUnnest& globalsToUnnest) { |
| if (func->imported()) { |
| return; |
| } |
| |
| FunctionOptimizer optimizer(*this, globalsToUnnest); |
| optimizer.walkFunctionInModule(func, module); |
| }); |
| |
| // Un-nest any globals as needed, using the deterministic order of the |
| // functions in the module. |
| Builder builder(*module); |
| auto addedGlobals = false; |
| for (auto& func : module->functions) { |
| // Each work item here is a global with a struct.new, from which we want |
| // to read a particular index, from a particular global.get. |
| for (auto& [globalName, index, get] : optimization.map[func.get()]) { |
| auto* global = module->getGlobal(globalName); |
| auto* structNew = global->init->cast<StructNew>(); |
| assert(index < structNew->operands.size()); |
| auto*& operand = structNew->operands[index]; |
| |
| // If we already un-nested this then we don't need to repeat that work. |
| if (auto* nestedGet = operand->dynCast<GlobalGet>()) { |
| // We already un-nested, and this global.get refers to the new global. |
| // Simply copy the target. |
| get->name = nestedGet->name; |
| assert(get->type == nestedGet->type); |
| } else { |
| // Add a new global, initialized to the operand. |
| auto newName = Names::getValidGlobalName( |
| *module, |
| global->name.toString() + ".unnested." + std::to_string(index)); |
| module->addGlobal(builder.makeGlobal( |
| newName, get->type, operand, Builder::Immutable)); |
| // Replace the operand with a get of that new global, and update the |
| // original get to read the same. |
| operand = builder.makeGlobalGet(newName, get->type); |
| get->name = newName; |
| addedGlobals = true; |
| } |
| } |
| } |
| |
| if (addedGlobals) { |
| // Sort the globals so that added ones appear before their uses. |
| PassRunner runner(module); |
| runner.add("reorder-globals-always"); |
| runner.setIsNested(true); |
| runner.run(); |
| } |
| } |
| }; |
| |
| } // anonymous namespace |
| |
| Pass* createGlobalStructInferencePass() { return new GlobalStructInference(); } |
| |
| } // namespace wasm |