| /* |
| * Copyright 2018 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Operations on Stack IR. |
| // |
| |
| #include "ir/branch-utils.h" |
| #include "ir/iteration.h" |
| #include "ir/local-graph.h" |
| #include "pass.h" |
| #include "wasm-stack.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| StackIROptimizer::StackIROptimizer(Function* func, |
| StackIR& insts, |
| const PassOptions& passOptions, |
| FeatureSet features) |
| : func(func), insts(insts), passOptions(passOptions), features(features) {} |
| |
| void StackIROptimizer::run() { |
| dce(); |
| // FIXME: local2Stack is currently rather slow (due to localGraph), |
| // so for now run it only when really optimizing |
| if (passOptions.optimizeLevel >= 3 || passOptions.shrinkLevel >= 1) { |
| local2Stack(); |
| } |
| removeUnneededBlocks(); |
| dce(); |
| vacuum(); |
| } |
| |
| void StackIROptimizer::dce() { |
| // Remove code after an unreachable instruction: anything after it, up to the |
| // next control flow barrier, can simply be removed. |
| bool inUnreachableCode = false; |
| for (Index i = 0; i < insts.size(); i++) { |
| auto* inst = insts[i]; |
| if (!inst) { |
| continue; |
| } |
| if (inUnreachableCode) { |
| // Does the unreachable code end here? |
| if (isControlFlowBarrier(inst)) { |
| inUnreachableCode = false; |
| } else { |
| // We can remove this. |
| removeAt(i); |
| } |
| } else if (inst->type == Type::unreachable) { |
| inUnreachableCode = true; |
| } |
| } |
| |
| // Remove code before an Unreachable. Consider this: |
| // |
| // (drop |
| // .. |
| // ) |
| // (unreachable) |
| // |
| // The drop is not needed, as the unreachable puts the stack in the |
| // polymorphic state anyhow. Note that we don't need to optimize anything |
| // other than a drop here, as in general the Binaryen IR DCE pass will handle |
| // everything else. A drop followed by an unreachable is the only thing that |
| // pass cannot handle, as the structured form of Binaryen IR does not allow |
| // removing such a drop, and so we can only do it here in StackIR. |
| // |
| // TODO: We can look even further back, say if there is another drop of |
| // something before, then we can remove that drop as well. To do that |
| // we'd need to inspect the stack going backwards. |
| for (Index i = 1; i < insts.size(); i++) { |
| auto* inst = insts[i]; |
| if (!inst || inst->op != StackInst::Basic || |
| !inst->origin->is<Unreachable>()) { |
| continue; |
| } |
| |
| // Look back past nulls. |
| Index j = i - 1; |
| while (j > 0 && !insts[j]) { |
| j--; |
| } |
| |
| auto*& prev = insts[j]; |
| if (prev && prev->op == StackInst::Basic && prev->origin->is<Drop>()) { |
| prev = nullptr; |
| } |
| } |
| } |
| |
| // Remove obviously-unneeded code. |
| void StackIROptimizer::vacuum() { |
| // In the wasm binary format a nop is never needed. (In Binaryen IR, in |
| // comparison, it is necessary e.g. in a function body or an if arm.) |
| // |
| // It is especially important to remove nops because we add nops when we |
| // read wasm into Binaryen IR. That is, this avoids a potential increase in |
| // code size. |
| for (Index i = 0; i < insts.size(); i++) { |
| auto*& inst = insts[i]; |
| if (inst && inst->origin->is<Nop>()) { |
| inst = nullptr; |
| } |
| } |
| } |
| |
| // If ordered properly, we can avoid a local.set/local.get pair, |
| // and use the value directly from the stack, for example |
| // [..produce a value on the stack..] |
| // local.set $x |
| // [..much code..] |
| // local.get $x |
| // call $foo ;; use the value, foo(value) |
| // As long as the code in between does not modify $x, and has |
| // no control flow branching out, we can remove both the set |
| // and the get. |
| void StackIROptimizer::local2Stack() { |
| // We use the localGraph to tell us if a get-set pair is indeed a set that is |
| // read by that get, and only that get. Note that we run this on Binaryen IR, |
| // so we are assuming that no previous opt has changed the interaction of |
| // local operations. |
| // |
| // We use a lazy graph here as we only query in the rare case when we find a |
| // set/get pair that looks optimizable. |
| LazyLocalGraph localGraph(func); |
| // The binary writing of StringWTF16Get and StringSliceWTF is optimized to use |
| // fewer scratch locals when their operands are already LocalGets. To avoid |
| // interfering with that optimization, we have to avoid removing such |
| // LocalGets. |
| auto deferredGets = findStringViewDeferredGets(); |
| |
| // We maintain a stack of relevant values. This contains: |
| // * a null for each actual value that the value stack would have |
| // * an index of each LocalSet that *could* be on the value |
| // stack at that location. |
| const Index null = -1; |
| std::vector<Index> values; |
| // We also maintain a stack of values vectors for control flow, |
| // saving the stack as we enter and restoring it when we exit. |
| std::vector<std::vector<Index>> savedValues; |
| #ifdef STACK_OPT_DEBUG |
| std::cout << "func: " << func->name << '\n' << insts << '\n'; |
| #endif |
| for (Index instIndex = 0; instIndex < insts.size(); instIndex++) { |
| auto* inst = insts[instIndex]; |
| if (!inst) { |
| continue; |
| } |
| // First, consume values from the stack as required. |
| auto consumed = getNumConsumedValues(inst); |
| #ifdef STACK_OPT_DEBUG |
| std::cout << " " << instIndex << " : " << *inst << ", " << values.size() |
| << " on stack, will consume " << consumed << "\n "; |
| for (auto s : values) |
| std::cout << s << ' '; |
| std::cout << '\n'; |
| #endif |
| // TODO: currently we run dce before this, but if we didn't, we'd need |
| // to handle unreachable code here - it's ok to pop multiple values |
| // there even if the stack is at size 0. |
| while (consumed > 0) { |
| assert(values.size() > 0); |
| // Whenever we hit a possible stack value, kill it - it would |
| // be consumed here, so we can never optimize to it. |
| while (values.back() != null) { |
| values.pop_back(); |
| assert(values.size() > 0); |
| } |
| // Finally, consume the actual value that is consumed here. |
| values.pop_back(); |
| consumed--; |
| } |
| // After consuming, we can see what to do with this. First, handle |
| // control flow. |
| if (isControlFlowBegin(inst)) { |
| // Save the stack for when we end this control flow. |
| savedValues.push_back(values); // TODO: optimize copies |
| values.clear(); |
| } else if (isControlFlowEnd(inst)) { |
| assert(!savedValues.empty()); |
| values = savedValues.back(); |
| savedValues.pop_back(); |
| } else if (isControlFlow(inst)) { |
| // Otherwise, in the middle of control flow, just clear it |
| values.clear(); |
| } |
| // This is something we should handle, look into it. |
| if (inst->type.isConcrete()) { |
| bool optimized = false; |
| // Do not optimize multivalue locals, since those will be better |
| // optimized when they are visited in the binary writer and this |
| // optimization would intefere with that one. |
| if (auto* get = inst->origin->dynCast<LocalGet>(); |
| get && inst->type.isSingle() && !deferredGets.count(get)) { |
| // Use another local to clarify what instIndex means in this scope. |
| auto getIndex = instIndex; |
| |
| // This is a potential optimization opportunity! See if we |
| // can reach the set. |
| if (values.size() > 0) { |
| Index j = values.size() - 1; |
| while (1) { |
| // If there's an actual value in the way, we've failed. |
| auto setIndex = values[j]; |
| if (setIndex == null) { |
| break; |
| } |
| auto* set = insts[setIndex]->origin->cast<LocalSet>(); |
| if (set->index == get->index) { |
| // This might be a proper set-get pair, where the set is |
| // used by this get and nothing else, check that. |
| auto& sets = localGraph.getSets(get); |
| if (sets.size() == 1 && *sets.begin() == set) { |
| auto& setInfluences = localGraph.getSetInfluences(set); |
| // If this has the proper value of 1, also do the potentially- |
| // expensive check of whether we can remove this pair at all. |
| if (setInfluences.size() == 1 && |
| canRemoveSetGetPair(setIndex, getIndex)) { |
| assert(*setInfluences.begin() == get); |
| // Do it! The set and the get can go away, the proper |
| // value is on the stack. |
| #ifdef STACK_OPT_DEBUG |
| std::cout << " stackify the get\n"; |
| #endif |
| insts[setIndex] = nullptr; |
| insts[getIndex] = nullptr; |
| // Continuing on from here, replace this on the stack |
| // with a null, representing a regular value. We |
| // keep possible values above us active - they may |
| // be optimized later, as they would be pushed after |
| // us, and used before us, so there is no conflict. |
| values[j] = null; |
| optimized = true; |
| break; |
| } |
| } |
| } |
| // We failed here. Can we look some more? |
| if (j == 0) { |
| break; |
| } |
| j--; |
| } |
| } |
| } |
| if (!optimized) { |
| // This is an actual regular value on the value stack. |
| values.push_back(null); |
| } |
| } else if (inst->origin->is<LocalSet>() && inst->type == Type::none) { |
| // This set is potentially optimizable later, add to stack. |
| values.push_back(instIndex); |
| } |
| } |
| } |
| |
| // There may be unnecessary blocks we can remove: blocks without arriving |
| // branches are always ok to remove. |
| // TODO: A branch to a block in an if body can become a branch to that if body. |
| void StackIROptimizer::removeUnneededBlocks() { |
| // First, find all branch targets. |
| std::unordered_set<Name> targets; |
| for (auto*& inst : insts) { |
| if (inst) { |
| BranchUtils::operateOnScopeNameUses( |
| inst->origin, [&](Name& name) { targets.insert(name); }); |
| } |
| } |
| |
| // Remove untargeted blocks. |
| for (auto*& inst : insts) { |
| if (!inst) { |
| continue; |
| } |
| if (auto* block = inst->origin->dynCast<Block>()) { |
| if (!block->name.is() || !targets.count(block->name)) { |
| // TODO optimize, maybe run remove-unused-names |
| inst = nullptr; |
| } |
| } |
| } |
| } |
| |
| // A control flow "barrier" - a point where stack machine |
| // unreachability ends. |
| bool StackIROptimizer::isControlFlowBarrier(StackInst* inst) { |
| switch (inst->op) { |
| case StackInst::BlockEnd: |
| case StackInst::IfElse: |
| case StackInst::IfEnd: |
| case StackInst::LoopEnd: |
| case StackInst::Catch: |
| case StackInst::CatchAll: |
| case StackInst::Delegate: |
| case StackInst::TryEnd: |
| case StackInst::TryTableEnd: { |
| return true; |
| } |
| default: { |
| return false; |
| } |
| } |
| } |
| |
| // A control flow beginning. |
| bool StackIROptimizer::isControlFlowBegin(StackInst* inst) { |
| switch (inst->op) { |
| case StackInst::BlockBegin: |
| case StackInst::IfBegin: |
| case StackInst::LoopBegin: |
| case StackInst::TryBegin: |
| case StackInst::TryTableBegin: { |
| return true; |
| } |
| default: { |
| return false; |
| } |
| } |
| } |
| |
| // A control flow ending. |
| bool StackIROptimizer::isControlFlowEnd(StackInst* inst) { |
| switch (inst->op) { |
| case StackInst::BlockEnd: |
| case StackInst::IfEnd: |
| case StackInst::LoopEnd: |
| case StackInst::TryEnd: |
| case StackInst::Delegate: |
| case StackInst::TryTableEnd: { |
| return true; |
| } |
| default: { |
| return false; |
| } |
| } |
| } |
| |
| bool StackIROptimizer::isControlFlow(StackInst* inst) { |
| return inst->op != StackInst::Basic; |
| } |
| |
| // Remove the instruction at index i. If the instruction |
| // is control flow, and so has been expanded to multiple |
| // instructions, remove them as well. |
| void StackIROptimizer::removeAt(Index i) { |
| auto* inst = insts[i]; |
| insts[i] = nullptr; |
| if (inst->op == StackInst::Basic) { |
| return; // that was it |
| } |
| auto* origin = inst->origin; |
| while (1) { |
| i++; |
| assert(i < insts.size()); |
| inst = insts[i]; |
| insts[i] = nullptr; |
| if (inst && inst->origin == origin && isControlFlowEnd(inst)) { |
| return; // that's it, we removed it all |
| } |
| } |
| } |
| |
| Index StackIROptimizer::getNumConsumedValues(StackInst* inst) { |
| if (isControlFlow(inst)) { |
| // If consumes 1; that's it. |
| if (inst->op == StackInst::IfBegin) { |
| return 1; |
| } |
| return 0; |
| } |
| // Otherwise, for basic instructions, just count the expression children. |
| return ChildIterator(inst->origin).children.size(); |
| } |
| |
| // Given a pair of a local.set and local.get, see if we can remove them |
| // without breaking validation. Specifically, we must keep sets of non- |
| // nullable locals that dominate a get until the end of the block, such as |
| // here: |
| // |
| // local.set 0 ;; Can we remove |
| // local.get 0 ;; this pair? |
| // if |
| // local.set 0 |
| // else |
| // local.set 0 |
| // end |
| // local.get 0 ;; This get poses a problem. |
| // |
| // Logically the 2nd&3rd sets ensure a value is applied to the local before we |
| // read it, but the validation rules only track each set until the end of its |
| // scope, so the 1st set (before the if, in the pair) is necessary. |
| // |
| // The logic below is related to LocalStructuralDominance, but sharing code |
| // with it is difficult as this uses StackIR and not BinaryenIR, and it checks |
| // a particular set/get pair. |
| // |
| // We are given the indexes of the set and get instructions in |insts|. |
| bool StackIROptimizer::canRemoveSetGetPair(Index setIndex, Index getIndex) { |
| // The set must be before the get. |
| assert(setIndex < getIndex); |
| |
| auto* set = insts[setIndex]->origin->cast<LocalSet>(); |
| auto localType = func->getLocalType(set->index); |
| // Note we do not need to handle tuples here, as the parent ignores them |
| // anyhow (hence we can check non-nullability instead of non- |
| // defaultability). |
| assert(localType.isSingle()); |
| if (func->isParam(set->index) || !localType.isNonNullable()) { |
| // This local cannot pose a problem for validation (params are always |
| // initialized, and it is ok if nullable locals are uninitialized). |
| return true; |
| } |
| |
| // Track the depth (in block/if/loop/etc. scopes) relative to our starting |
| // point. Anything less deep than that is not interesting, as we can only |
| // help things at our depth or deeper to validate. |
| Index currDepth = 0; |
| |
| // Look for a different get than the one in getIndex (since that one is |
| // being removed) which would stop validating without the set. While doing |
| // so, note other sets that ensure validation even if our set is removed. We |
| // track those in this stack of booleans, one for each scope, which is true |
| // if another sets covers us and ours is not needed. |
| // |
| // We begin in the current scope and with no other set covering us. |
| std::vector<bool> coverStack = {false}; |
| |
| // Track the total number of covers as well, for quick checking below. |
| Index covers = 0; |
| |
| // TODO: We could look before us as well, but then we might end up scanning |
| // much of the function every time. |
| for (Index i = setIndex + 1; i < insts.size(); i++) { |
| auto* inst = insts[i]; |
| if (!inst) { |
| continue; |
| } |
| if (isControlFlowBegin(inst)) { |
| // A new scope begins. |
| currDepth++; |
| coverStack.push_back(false); |
| } else if (isControlFlowEnd(inst)) { |
| if (currDepth == 0) { |
| // Less deep than the start, so we found no problem. |
| return true; |
| } |
| currDepth--; |
| |
| if (coverStack.back()) { |
| // A cover existed in the scope which ended. |
| covers--; |
| } |
| coverStack.pop_back(); |
| } else if (isControlFlowBarrier(inst)) { |
| // A barrier, like the else in an if-else, not only ends a scope but |
| // opens a new one. |
| if (currDepth == 0) { |
| // Another scope with the same depth begins, but ours ended, so stop. |
| return true; |
| } |
| |
| if (coverStack.back()) { |
| // A cover existed in the scope which ended. |
| covers--; |
| } |
| coverStack.back() = false; |
| } else if (auto* otherSet = inst->origin->dynCast<LocalSet>()) { |
| // We are covered in this scope henceforth. |
| if (otherSet->index == set->index) { |
| if (!coverStack.back()) { |
| covers++; |
| if (currDepth == 0) { |
| // We have a cover at depth 0, so everything from here on out |
| // will be covered. |
| return true; |
| } |
| coverStack.back() = true; |
| } |
| } |
| } else if (auto* otherGet = inst->origin->dynCast<LocalGet>()) { |
| if (otherGet->index == set->index && i != getIndex && !covers) { |
| // We found a get that might be a problem: it uses the same index, but |
| // is not the get we were told about, and no other set covers us. |
| return false; |
| } |
| } |
| } |
| |
| // No problem. |
| return true; |
| } |
| |
| std::unordered_set<LocalGet*> StackIROptimizer::findStringViewDeferredGets() { |
| std::unordered_set<LocalGet*> deferred; |
| auto note = [&](Expression* e) { |
| if (auto* get = e->dynCast<LocalGet>()) { |
| deferred.insert(get); |
| } |
| }; |
| for (auto* inst : insts) { |
| if (!inst) { |
| continue; |
| } |
| if (auto* curr = inst->origin->dynCast<StringWTF16Get>()) { |
| note(curr->pos); |
| } else if (auto* curr = inst->origin->dynCast<StringSliceWTF>()) { |
| note(curr->start); |
| note(curr->end); |
| } |
| } |
| return deferred; |
| } |
| |
| } // namespace wasm |