| /* |
| * Copyright 2023 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define UNSUBTYPING_DEBUG 0 |
| |
| #include <cstddef> |
| |
| #if !UNSUBTYPING_DEBUG |
| #include <unordered_map> |
| #include <unordered_set> |
| #endif |
| |
| #include "ir/subtype-exprs.h" |
| #include "ir/type-updating.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "support/index.h" |
| #include "wasm-traversal.h" |
| #include "wasm-type.h" |
| #include "wasm.h" |
| |
| #if UNSUBTYPING_DEBUG |
| #include "support/insert_ordered.h" |
| #endif |
| |
| // Compute and use the minimal subtype relation required to maintain module |
| // validity and behavior. This minimal relation will be a subset of the original |
| // subtype relation. Start by walking the IR and collecting pairs of types that |
| // need to be in the subtype relation for each expression to validate. For |
| // example, a local.set requires that the type of its operand be a subtype of |
| // the local's type. Casts do not generate subtypings at this point because it |
| // is not necessary for the cast target to be a subtype of the cast source for |
| // the cast to validate. |
| // |
| // From that initial subtype relation, we then start finding new subtypings that |
| // are required by the subtypings we have found already. These transitively |
| // required subtypings come from two sources. |
| // |
| // The first source is type definitions. Consider these type definitions: |
| // |
| // (type $A (sub (struct (ref $X)))) |
| // (type $B (sub $A (struct (ref $Y)))) |
| // |
| // If we have determined that $B must remain a subtype of $A, then we know that |
| // $Y must remain a subtype of $X as well, since the type definitions would not |
| // be valid otherwise. Similarly, knowing that $X must remain a subtype of $Y |
| // may transitively require other subtypings as well based on their type |
| // definitions. |
| // |
| // The second source of transitive subtyping requirements is casts. Although |
| // casting from one type to another does not necessarily require that those |
| // types are related, we do need to make sure that we do not change the |
| // behavior of casts by removing subtype relationships they might observe. For |
| // example, consider this module: |
| // |
| // (module |
| // ;; original subtyping: $bot <: $mid <: $top |
| // (type $top (sub (struct))) |
| // (type $mid (sub $top (struct))) |
| // (type $bot (sub $mid (struct))) |
| // |
| // (func $f |
| // (local $top (ref $top)) |
| // (local $mid (ref $mid)) |
| // |
| // ;; Requires $bot <: $top |
| // (local.set $top (struct.new $bot)) |
| // |
| // ;; Cast $top to $mid |
| // (local.set $mid (ref.cast (ref $mid) (local.get $top))) |
| // ) |
| // ) |
| // |
| // The only subtype relation directly required by the IR for this module is $bot |
| // <: $top. However, if we optimized the module so that $bot <: $top was the |
| // only subtype relation, we would change the behavior of the cast. In the |
| // original module, a value of type (ref $bot) is cast to (ref $mid). The cast |
| // succeeds because in the original module, $bot <: $mid. If we optimize so that |
| // we have $bot <: $top and no other subtypings, though, the cast will fail |
| // because the value of type (ref $bot) no longer inhabits (ref $mid). To |
| // prevent the cast's behavior from changing, we need to ensure that $bot <: |
| // $mid. |
| // |
| // The set of subtyping requirements generated by a cast from $src to $dest is |
| // that for every known remaining subtype $v of $src, if $v <: $dest in the |
| // original module, then $v <: $dest in the optimized module. In other words, |
| // for every type $v of values we know can flow into the cast, if the cast would |
| // have succeeded for values of type $v before, then we know the cast must |
| // continue to succeed for values of type $v. These requirements arising from |
| // casts can also generate transitive requirements because we learn about new |
| // types of values that can flow into casts as we learn about new subtypes of |
| // cast sources. |
| // |
| // Starting with the initial subtype relation determined by walking the IR, |
| // repeatedly search for new subtypings by analyzing type definitions and casts |
| // until we reach a fixed point. This is the minimal subtype relation that |
| // preserves module validity and behavior that can be found without a more |
| // precise analysis of types that might flow into each cast. |
| |
| namespace wasm { |
| |
| namespace { |
| |
| #if UNSUBTYPING_DEBUG |
| template<typename K, typename V> using Map = InsertOrderedMap<K, V>; |
| template<typename T> using Set = InsertOrderedSet<T>; |
| #else |
| template<typename K, typename V> using Map = std::unordered_map<K, V>; |
| template<typename T> using Set = std::unordered_set<T>; |
| #endif |
| // A tree (or rather a forest) of types with the ability to query and set |
| // supertypes in constant time and efficiently iterate over supertypes and |
| // subtypes. |
| struct TypeTree { |
| struct Node { |
| // The type represented by this node. |
| HeapType type; |
| // The index of the parent (supertype) in the list of nodes. Set to the |
| // index of this node if there is no parent. |
| Index parent; |
| // The index of this node in the parent's list of children, if any, enabling |
| // O(1) updates. |
| Index indexInParent = 0; |
| // The indices of the children (subtypes) in the list of nodes. |
| std::vector<Index> children; |
| |
| Node(HeapType type, Index index) : type(type), parent(index) {} |
| }; |
| |
| std::vector<Node> nodes; |
| Map<HeapType, Index> indices; |
| |
| void setSupertype(HeapType sub, HeapType super) { |
| auto childIndex = getIndex(sub); |
| auto parentIndex = getIndex(super); |
| auto& childNode = nodes[childIndex]; |
| auto& parentNode = nodes[parentIndex]; |
| // Remove sub from its old supertype if necessary. |
| if (auto oldParentIndex = childNode.parent; oldParentIndex != childIndex) { |
| auto& oldParentNode = nodes[oldParentIndex]; |
| // Move sub to the back of its parent's children and then pop it. |
| auto& children = oldParentNode.children; |
| assert(children[childNode.indexInParent] == childIndex); |
| auto& swappedNode = nodes[children.back()]; |
| assert(swappedNode.indexInParent == children.size() - 1); |
| // Swap the indices in the parent's child vector. |
| std::swap(children[childNode.indexInParent], children.back()); |
| // Swap the index in the kept child. |
| swappedNode.indexInParent = childNode.indexInParent; |
| children.pop_back(); |
| } |
| childNode.parent = parentIndex; |
| childNode.indexInParent = parentNode.children.size(); |
| parentNode.children.push_back(childIndex); |
| } |
| |
| std::optional<HeapType> getSupertype(HeapType type) { |
| auto index = getIndex(type); |
| auto parentIndex = nodes[index].parent; |
| if (parentIndex == index) { |
| return std::nullopt; |
| } |
| return nodes[parentIndex].type; |
| } |
| |
| struct SupertypeIterator { |
| using value_type = const HeapType; |
| using difference_type = std::ptrdiff_t; |
| using reference = const HeapType&; |
| using pointer = const HeapType*; |
| using iterator_category = std::input_iterator_tag; |
| |
| TypeTree* parent; |
| std::optional<Index> index; |
| |
| bool operator==(const SupertypeIterator& other) { |
| return index == other.index; |
| } |
| bool operator!=(const SupertypeIterator& other) { |
| return !(*this == other); |
| } |
| const HeapType& operator*() const { return parent->nodes[*index].type; } |
| const HeapType* operator->() const { return &*(*this); } |
| SupertypeIterator& operator++() { |
| auto parentIndex = parent->nodes[*index].parent; |
| if (parentIndex == *index) { |
| index = std::nullopt; |
| } else { |
| index = parentIndex; |
| } |
| return *this; |
| } |
| SupertypeIterator operator++(int) { |
| auto it = *this; |
| ++(*this); |
| return it; |
| } |
| }; |
| |
| struct Supertypes { |
| TypeTree* parent; |
| Index index; |
| SupertypeIterator begin() { return {parent, index}; } |
| SupertypeIterator end() { return {parent, std::nullopt}; } |
| }; |
| |
| Supertypes supertypes(HeapType type) { return {this, getIndex(type)}; } |
| |
| struct SubtypeIterator { |
| using value_type = const HeapType; |
| using difference_type = std::ptrdiff_t; |
| using reference = const HeapType&; |
| using pointer = const HeapType*; |
| using iterator_category = std::input_iterator_tag; |
| |
| TypeTree* parent; |
| |
| // DFS stack of (node index, child index) pairs. |
| std::vector<std::pair<Index, Index>> stack; |
| |
| bool operator==(const SubtypeIterator& other) { |
| return stack == other.stack; |
| } |
| bool operator!=(const SubtypeIterator& other) { return !(*this == other); } |
| const HeapType& operator*() const { |
| return parent->nodes[stack.back().first].type; |
| } |
| const HeapType* operator->() const { return &*(*this); } |
| SubtypeIterator& operator++() { |
| while (true) { |
| if (stack.empty()) { |
| return *this; |
| } |
| auto& [index, childIndex] = stack.back(); |
| auto& children = parent->nodes[index].children; |
| if (childIndex == children.size()) { |
| stack.pop_back(); |
| } else { |
| auto child = children[childIndex++]; |
| stack.push_back({child, 0u}); |
| return *this; |
| } |
| } |
| } |
| SubtypeIterator operator++(int) { |
| auto it = *this; |
| ++(*this); |
| return it; |
| } |
| }; |
| |
| struct Subtypes { |
| TypeTree* parent; |
| Index index; |
| SubtypeIterator begin() { return {parent, {std::make_pair(index, 0u)}}; } |
| SubtypeIterator end() { return {parent, {}}; } |
| }; |
| |
| Subtypes subtypes(HeapType type) { return {this, getIndex(type)}; } |
| |
| private: |
| Index getIndex(HeapType type) { |
| auto [it, inserted] = indices.insert({type, nodes.size()}); |
| if (inserted) { |
| nodes.emplace_back(type, nodes.size()); |
| } |
| return it->second; |
| } |
| }; |
| |
| struct Unsubtyping : Pass { |
| // (sub, super) pairs that we have discovered but not yet processed. |
| std::vector<std::pair<HeapType, HeapType>> work; |
| |
| // Record the type tree with supertype and subtype relations in such a way |
| // that we can add new supertype relationships in constant time. |
| TypeTree types; |
| |
| // Map from cast source types to their destinations. |
| Map<HeapType, std::vector<HeapType>> casts; |
| |
| void run(Module* wasm) override { |
| if (!wasm->features.hasGC()) { |
| return; |
| } |
| |
| // Initialize the subtype relation based on what is immediately required to |
| // keep the code and public types valid. |
| analyzePublicTypes(*wasm); |
| analyzeModule(*wasm); |
| |
| // Find further subtypings and iterate to a fixed point. |
| while (!work.empty()) { |
| auto [sub, super] = work.back(); |
| work.pop_back(); |
| process(sub, super); |
| } |
| |
| rewriteTypes(*wasm); |
| |
| // Cast types may be refinable if their source and target types are no |
| // longer related. TODO: Experiment with running this only after checking |
| // whether it is necessary. |
| ReFinalize().run(getPassRunner(), wasm); |
| } |
| |
| void noteSubtype(HeapType sub, HeapType super) { |
| // Bottom types are uninteresting, but other basic heap types can be |
| // interesting because of their interactions with casts. |
| if (sub == super || sub.isBottom()) { |
| return; |
| } |
| work.push_back({sub, super}); |
| } |
| |
| void noteSubtype(Type sub, Type super) { |
| if (sub.isTuple()) { |
| assert(super.isTuple() && sub.size() == super.size()); |
| for (size_t i = 0, size = sub.size(); i < size; ++i) { |
| noteSubtype(sub[i], super[i]); |
| } |
| return; |
| } |
| if (!sub.isRef() || !super.isRef()) { |
| return; |
| } |
| noteSubtype(sub.getHeapType(), super.getHeapType()); |
| } |
| |
| void analyzePublicTypes(Module& wasm) { |
| // We cannot change supertypes for anything public. |
| for (auto type : ModuleUtils::getPublicHeapTypes(wasm)) { |
| if (auto super = type.getDeclaredSuperType()) { |
| noteSubtype(type, *super); |
| } |
| } |
| } |
| |
| void analyzeModule(Module& wasm) { |
| struct Info { |
| // (source, target) pairs for casts. |
| Set<std::pair<HeapType, HeapType>> casts; |
| |
| // Observed (sub, super) subtype constraints. |
| Set<std::pair<HeapType, HeapType>> subtypings; |
| }; |
| |
| struct Collector |
| : ControlFlowWalker<Collector, SubtypingDiscoverer<Collector>> { |
| Info& info; |
| Collector(Info& info) : info(info) {} |
| void noteSubtype(Type sub, Type super) { |
| if (sub.isTuple()) { |
| assert(super.isTuple() && sub.size() == super.size()); |
| for (size_t i = 0, size = sub.size(); i < size; ++i) { |
| noteSubtype(sub[i], super[i]); |
| } |
| return; |
| } |
| if (!sub.isRef() || !super.isRef()) { |
| return; |
| } |
| noteSubtype(sub.getHeapType(), super.getHeapType()); |
| } |
| void noteSubtype(HeapType sub, HeapType super) { |
| assert(HeapType::isSubType(sub, super)); |
| if (sub == super || sub.isBottom()) { |
| return; |
| } |
| info.subtypings.insert({sub, super}); |
| } |
| void noteSubtype(Type sub, Expression* super) { |
| noteSubtype(sub, super->type); |
| } |
| void noteSubtype(Expression* sub, Type super) { |
| noteSubtype(sub->type, super); |
| } |
| void noteSubtype(Expression* sub, Expression* super) { |
| noteSubtype(sub->type, super->type); |
| } |
| void noteNonFlowSubtype(Expression* sub, Type super) { |
| // This expression's type must be a subtype of |super|, but the value |
| // does not flow anywhere - this is a static constraint. As the value |
| // does not flow, it cannot reach anywhere else, which means we need |
| // this in order to validate but it does not interact with casts. Given |
| // that, if super is a basic type then we can simply ignore this: we |
| // only remove subtyping between user types, so subtyping wrt basic |
| // types is unchanged, and so this constraint will never be a problem. |
| // |
| // This is sort of a hack because in general to be precise we should not |
| // just consider basic types here - in general, we should note for each |
| // constraint whether it is a flow-based one or not, and only take the |
| // flow-based ones into account when looking at the impact of casts. |
| // However, in practice this is enough as the only non-trivial case of |
| // |noteNonFlowSubtype| is for RefEq, which uses a basic type (eqref). |
| // Other cases of non-flow subtyping end up trivial, e.g., the target of |
| // a CallRef is compared to itself (and we ignore constraints of A :> |
| // A). However, if we change how |noteNonFlowSubtype| is used in |
| // SubtypingDiscoverer then we may need to generalize this. |
| if (super.isRef() && super.getHeapType().isBasic()) { |
| return; |
| } |
| |
| // Otherwise, we must take this into account. |
| noteSubtype(sub, super); |
| } |
| void noteCast(HeapType src, HeapType dst) { |
| // Casts to self and casts that must fail because they have incompatible |
| // types are uninteresting. |
| if (dst == src) { |
| return; |
| } |
| if (HeapType::isSubType(dst, src)) { |
| info.casts.insert({src, dst}); |
| return; |
| } |
| if (HeapType::isSubType(src, dst)) { |
| // This is an upcast that will always succeed, but only if we ensure |
| // src <: dst. |
| info.subtypings.insert({src, dst}); |
| } |
| } |
| void noteCast(Expression* src, Type dst) { |
| if (src->type.isRef() && dst.isRef()) { |
| noteCast(src->type.getHeapType(), dst.getHeapType()); |
| } |
| } |
| void noteCast(Expression* src, Expression* dst) { |
| if (src->type.isRef() && dst->type.isRef()) { |
| noteCast(src->type.getHeapType(), dst->type.getHeapType()); |
| } |
| } |
| }; |
| |
| // Collect subtyping constraints and casts from functions in parallel. |
| ModuleUtils::ParallelFunctionAnalysis<Info> analysis( |
| wasm, [&](Function* func, Info& info) { |
| if (!func->imported()) { |
| Collector(info).walkFunctionInModule(func, &wasm); |
| } |
| }); |
| |
| Info collectedInfo; |
| for (auto& [_, info] : analysis.map) { |
| collectedInfo.casts.insert(info.casts.begin(), info.casts.end()); |
| collectedInfo.subtypings.insert(info.subtypings.begin(), |
| info.subtypings.end()); |
| } |
| |
| // Collect constraints from module-level code as well. |
| Collector collector(collectedInfo); |
| collector.walkModuleCode(&wasm); |
| collector.setModule(&wasm); |
| for (auto& global : wasm.globals) { |
| collector.visitGlobal(global.get()); |
| } |
| for (auto& segment : wasm.elementSegments) { |
| collector.visitElementSegment(segment.get()); |
| } |
| |
| // Prepare the collected information for the upcoming processing loop. |
| for (auto& [sub, super] : collectedInfo.subtypings) { |
| noteSubtype(sub, super); |
| } |
| for (auto [src, dst] : collectedInfo.casts) { |
| casts[src].push_back(dst); |
| } |
| } |
| |
| void process(HeapType sub, HeapType super) { |
| assert(HeapType::isSubType(sub, super)); |
| auto oldSuper = types.getSupertype(sub); |
| if (oldSuper) { |
| // We already had a recorded supertype. The new supertype might be |
| // deeper,shallower, or equal to the old supertype. We must recursively |
| // note the relationship between the old and new supertypes. |
| if (super == *oldSuper) { |
| // Nothing new to do here. |
| return; |
| } |
| if (HeapType::isSubType(*oldSuper, super)) { |
| // sub <: oldSuper <: super |
| processDescribed(sub, *oldSuper, super); |
| noteSubtype(*oldSuper, super); |
| // We already handled sub <: oldSuper, so we're done. |
| return; |
| } |
| // sub <: super <: oldSuper |
| // Eagerly process super <: oldSuper first. This ensures that sub and |
| // super will already be in the same tree when we process them below, so |
| // when we process casts we will know that we only need to process up to |
| // oldSuper. |
| processDescribed(sub, super, *oldSuper); |
| process(super, *oldSuper); |
| } |
| |
| types.setSupertype(sub, super); |
| |
| // We have a new supertype. Find the implied subtypings from the type |
| // definitions and casts. |
| processDefinitions(sub, super); |
| processCasts(sub, super, oldSuper); |
| } |
| |
| void processDescribed(HeapType sub, HeapType mid, HeapType super) { |
| // We are establishing sub <: mid <: super. If super describes the immediate |
| // supertype of the type sub describes, then once we insert mid between them |
| // we would have this: |
| // |
| // A -> super |
| // ^ ^ |
| // | mid |
| // | ^ |
| // C -> sub |
| // |
| // This violates the requirement that the descriptor of C's immediate |
| // supertype must be the immediate supertype of C's descriptor. To fix it, |
| // we have to find the type B that mid describes and insert it between A and |
| // C: |
| // |
| // A -> super |
| // ^ ^ |
| // B -> mid |
| // ^ ^ |
| // C -> sub |
| // |
| // We do this eagerly before we establish sub <: mid <: super so that if |
| // establishing that subtyping requires recursively establishing other |
| // subtypings, we can depend on the invariant that the described types are |
| // always set up correctly beforehand. |
| auto subDescribed = sub.getDescribedType(); |
| auto superDescribed = super.getDescribedType(); |
| if (subDescribed && superDescribed && |
| types.getSupertype(*subDescribed) == superDescribed) { |
| auto midDescribed = mid.getDescribedType(); |
| assert(midDescribed); |
| process(*subDescribed, *midDescribed); |
| } |
| } |
| |
| void processDefinitions(HeapType sub, HeapType super) { |
| if (super.isBasic()) { |
| return; |
| } |
| switch (sub.getKind()) { |
| case HeapTypeKind::Func: { |
| auto sig = sub.getSignature(); |
| auto superSig = super.getSignature(); |
| noteSubtype(superSig.params, sig.params); |
| noteSubtype(sig.results, superSig.results); |
| break; |
| } |
| case HeapTypeKind::Struct: { |
| const auto& fields = sub.getStruct().fields; |
| const auto& superFields = super.getStruct().fields; |
| for (size_t i = 0, size = superFields.size(); i < size; ++i) { |
| noteSubtype(fields[i].type, superFields[i].type); |
| } |
| break; |
| } |
| case HeapTypeKind::Array: { |
| auto elem = sub.getArray().element; |
| noteSubtype(elem.type, super.getArray().element.type); |
| break; |
| } |
| case HeapTypeKind::Cont: |
| WASM_UNREACHABLE("TODO: cont"); |
| case HeapTypeKind::Basic: |
| WASM_UNREACHABLE("unexpected kind"); |
| } |
| if (auto desc = sub.getDescriptorType()) { |
| if (auto superDesc = super.getDescriptorType()) { |
| noteSubtype(*desc, *superDesc); |
| } |
| } |
| } |
| |
| void |
| processCasts(HeapType sub, HeapType super, std::optional<HeapType> oldSuper) { |
| // We are either attaching the one tree rooted at `sub` under a new |
| // supertype in another tree, or we are reparenting `sub` below a |
| // descendent of `oldSuper` in the same tree. In the former case, we must |
| // evaluate `sub` and all its subtypes against all its new supertypes and |
| // their cast destinations. In the latter case, `sub` and all its subtypes |
| // must have already been evaluated against `oldSuper` and its supertypes, |
| // so we only need to additionally evaluate them against supertypes up to |
| // `oldSuper`. |
| for (auto type : types.subtypes(sub)) { |
| for (auto src : types.supertypes(super)) { |
| if (oldSuper && src == *oldSuper) { |
| break; |
| } |
| for (auto dst : casts[src]) { |
| if (HeapType::isSubType(type, dst)) { |
| noteSubtype(type, dst); |
| } |
| } |
| } |
| } |
| } |
| |
| void rewriteTypes(Module& wasm) { |
| struct Rewriter : GlobalTypeRewriter { |
| Unsubtyping& parent; |
| Rewriter(Unsubtyping& parent, Module& wasm) |
| : GlobalTypeRewriter(wasm), parent(parent) {} |
| std::optional<HeapType> getDeclaredSuperType(HeapType type) override { |
| if (auto super = parent.types.getSupertype(type); |
| super && !super->isBasic()) { |
| return *super; |
| } |
| return std::nullopt; |
| } |
| }; |
| Rewriter(*this, wasm).update(); |
| } |
| }; |
| |
| } // anonymous namespace |
| |
| Pass* createUnsubtypingPass() { return new Unsubtyping(); } |
| |
| } // namespace wasm |