| /* |
| * Copyright 2017 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Local CSE |
| // |
| // This requires --flatten to be run before in order to be effective, |
| // and preserves flatness. The reason flatness is required is that |
| // this pass assumes everything is stored in a local, and all it does |
| // is alter local.sets to do local.gets of an existing value when |
| // possible, replacing a recomputing of that value. That design means that |
| // if there are block and if return values, nested expressions not stored |
| // to a local, etc., then it can't operate on them (and will just not |
| // do anything for them). |
| // |
| // In each linear area of execution, |
| // * track each relevant (big enough) expression |
| // * if already seen, write to a local if not already, and reuse |
| // * invalidate the list as we see effects |
| // |
| // TODO: global, inter-block gvn etc. |
| // |
| |
| #include <algorithm> |
| #include <memory> |
| |
| #include "ir/flat.h" |
| #include <ir/cost.h> |
| #include <ir/effects.h> |
| #include <ir/equivalent_sets.h> |
| #include <ir/hashed.h> |
| #include <pass.h> |
| #include <wasm-builder.h> |
| #include <wasm-traversal.h> |
| #include <wasm.h> |
| |
| namespace wasm { |
| |
| struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { |
| bool isFunctionParallel() override { return true; } |
| |
| Pass* create() override { return new LocalCSE(); } |
| |
| struct Usable { |
| HashedExpression hashed; |
| Type localType; |
| Usable(HashedExpression hashed, Type localType) |
| : hashed(hashed), localType(localType) {} |
| }; |
| |
| struct UsableHasher { |
| HashType operator()(const Usable value) const { |
| return rehash(value.hashed.hash, value.localType.getID()); |
| } |
| }; |
| |
| struct UsableComparer { |
| bool operator()(const Usable a, const Usable b) const { |
| if (a.hashed.hash != b.hashed.hash || a.localType != b.localType) { |
| return false; |
| } |
| return ExpressionAnalyzer::equal(a.hashed.expr, b.hashed.expr); |
| } |
| }; |
| |
| // information for an expression we can reuse |
| struct UsableInfo { |
| Expression* value; // the value we can reuse |
| Index index; // the local we are assigned to, local.get that to reuse us |
| EffectAnalyzer effects; |
| |
| UsableInfo(Expression* value, |
| Index index, |
| PassOptions& passOptions, |
| FeatureSet features) |
| : value(value), index(index), effects(passOptions, features, value) {} |
| }; |
| |
| // a list of usables in a linear execution trace |
| class Usables |
| : public std:: |
| unordered_map<Usable, UsableInfo, UsableHasher, UsableComparer> {}; |
| |
| // locals in current linear execution trace, which we try to sink |
| Usables usables; |
| |
| // We track locals containing the same value - the value is what matters, not |
| // the index. |
| EquivalentSets equivalences; |
| |
| bool anotherPass; |
| |
| void doWalkFunction(Function* func) { |
| Flat::verifyFlatness(func); |
| anotherPass = true; |
| // we may need multiple rounds |
| while (anotherPass) { |
| anotherPass = false; |
| clear(); |
| super::doWalkFunction(func); |
| } |
| } |
| |
| static void doNoteNonLinear(LocalCSE* self, Expression** currp) { |
| self->clear(); |
| } |
| |
| void clear() { |
| usables.clear(); |
| equivalences.clear(); |
| } |
| |
| // Checks invalidations due to a set of effects. Also optionally receive |
| // an expression that was just post-visited, and that also needs to be |
| // taken into account. |
| void checkInvalidations(EffectAnalyzer& effects, Expression* curr = nullptr) { |
| // TODO: this is O(bad) |
| std::vector<Usable> invalidated; |
| for (auto& sinkable : usables) { |
| // Check invalidations of the values we may want to use. |
| if (effects.invalidates(sinkable.second.effects)) { |
| invalidated.push_back(sinkable.first); |
| } |
| } |
| if (curr) { |
| // If we are a set, we have more to check: each of the usable |
| // values was from a set, and we did not consider the set in |
| // the loop above - just the values. So here we must check that |
| // sets do not interfere. (Note that due to flattening we |
| // have no risk of tees etc.) |
| if (auto* set = curr->dynCast<LocalSet>()) { |
| for (auto& sinkable : usables) { |
| // Check if the index is the same. Make sure to ignore |
| // our own value, which we may have just added! |
| if (sinkable.second.index == set->index && |
| sinkable.second.value != set->value) { |
| invalidated.push_back(sinkable.first); |
| } |
| } |
| } |
| } |
| for (auto index : invalidated) { |
| usables.erase(index); |
| } |
| } |
| |
| std::vector<Expression*> expressionStack; |
| |
| static void visitPre(LocalCSE* self, Expression** currp) { |
| // pre operations |
| Expression* curr = *currp; |
| |
| EffectAnalyzer effects(self->getPassOptions(), self->getModule()->features); |
| if (effects.checkPre(curr)) { |
| self->checkInvalidations(effects); |
| } |
| |
| self->expressionStack.push_back(curr); |
| } |
| |
| static void visitPost(LocalCSE* self, Expression** currp) { |
| auto* curr = *currp; |
| |
| // main operations |
| self->handle(curr); |
| |
| // post operations |
| |
| EffectAnalyzer effects(self->getPassOptions(), self->getModule()->features); |
| if (effects.checkPost(curr)) { |
| self->checkInvalidations(effects, curr); |
| } |
| |
| self->expressionStack.pop_back(); |
| } |
| |
| // override scan to add a pre and a post check task to all nodes |
| static void scan(LocalCSE* self, Expression** currp) { |
| self->pushTask(visitPost, currp); |
| |
| super::scan(self, currp); |
| |
| self->pushTask(visitPre, currp); |
| } |
| |
| void handle(Expression* curr) { |
| if (auto* set = curr->dynCast<LocalSet>()) { |
| // Calculate equivalences |
| auto* func = getFunction(); |
| equivalences.reset(set->index); |
| if (auto* get = set->value->dynCast<LocalGet>()) { |
| if (func->getLocalType(set->index) == func->getLocalType(get->index)) { |
| equivalences.add(set->index, get->index); |
| } |
| } |
| // consider the value |
| auto* value = set->value; |
| if (isRelevant(value)) { |
| Usable usable(value, func->getLocalType(set->index)); |
| auto iter = usables.find(usable); |
| if (iter != usables.end()) { |
| // already exists in the table, this is good to reuse |
| auto& info = iter->second; |
| Type localType = func->getLocalType(info.index); |
| set->value = |
| Builder(*getModule()).makeLocalGet(info.index, localType); |
| anotherPass = true; |
| } else { |
| // not in table, add this, maybe we can help others later |
| usables.emplace(std::make_pair( |
| usable, |
| UsableInfo( |
| value, set->index, getPassOptions(), getModule()->features))); |
| } |
| } |
| } else if (auto* get = curr->dynCast<LocalGet>()) { |
| if (auto* set = equivalences.getEquivalents(get->index)) { |
| // Canonicalize to the lowest index. This lets hashing and comparisons |
| // "just work". |
| get->index = *std::min_element(set->begin(), set->end()); |
| } |
| } |
| } |
| |
| // A relevant value is a non-trivial one, something we may want to reuse |
| // and are able to. |
| bool isRelevant(Expression* value) { |
| if (value->is<LocalGet>()) { |
| return false; // trivial, this is what we optimize to! |
| } |
| if (!value->type.isConcrete()) { |
| return false; // don't bother with unreachable etc. |
| } |
| if (EffectAnalyzer(getPassOptions(), getModule()->features, value) |
| .hasSideEffects()) { |
| return false; // we can't combine things with side effects |
| } |
| auto& options = getPassRunner()->options; |
| // If the size is at least 3, then if we have two of them we have 6, |
| // and so adding one set+two gets and removing one of the items itself |
| // is not detrimental, and may be beneficial. |
| if (options.shrinkLevel > 0 && Measurer::measure(value) >= 3) { |
| return true; |
| } |
| // If we focus on speed, any reduction in cost is beneficial, as the |
| // cost of a get is essentially free. |
| if (options.shrinkLevel == 0 && CostAnalyzer(value).cost > 0) { |
| return true; |
| } |
| return false; |
| } |
| }; |
| |
| Pass* createLocalCSEPass() { return new LocalCSE(); } |
| |
| } // namespace wasm |