| /* |
| * Copyright 2023 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "ir/names.h" |
| #include "ir/stack-utils.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "passes/stringify-walker.h" |
| #include "support/intervals.h" |
| #include "support/suffix_tree.h" |
| #include "wasm-ir-builder.h" |
| #include "wasm.h" |
| |
| #define OUTLINING_DEBUG 0 |
| |
| #if OUTLINING_DEBUG |
| #define ODBG(statement) statement |
| #else |
| #define ODBG(statement) |
| #endif |
| |
| // Check a Result or MaybeResult for error and call Fatal() if the error exists. |
| #define ASSERT_OK(val) \ |
| if (auto _val = (val); auto err = _val.getErr()) { \ |
| Fatal() << err->msg; \ |
| } |
| |
| namespace wasm { |
| |
| // This custom hasher conforms to std::hash<Key>. Its purpose is to provide |
| // a custom hash for if expressions, so the if-condition of the if expression is |
| // not included in the hash for the if expression. This is needed because in the |
| // binary format, the if-condition comes before and is consumed by the if. To |
| // match the binary format, we hash the if condition before and separately from |
| // the rest of the if expression. |
| struct StringifyHasher { |
| size_t operator()(Expression* curr) const { |
| if (Properties::isControlFlowStructure(curr)) { |
| if (auto* iff = curr->dynCast<If>()) { |
| size_t digest = wasm::hash(iff->_id); |
| rehash(digest, ExpressionAnalyzer::hash(iff->ifTrue)); |
| if (iff->ifFalse) { |
| rehash(digest, ExpressionAnalyzer::hash(iff->ifFalse)); |
| } |
| return digest; |
| } |
| |
| return ExpressionAnalyzer::hash(curr); |
| } |
| |
| return ExpressionAnalyzer::shallowHash(curr); |
| } |
| }; |
| |
| // This custom equator conforms to std::equal_to<Key>. Similar to |
| // StringifyHasher, it's purpose is to not include the if-condition when |
| // evaluating the equality of two if expressions. |
| struct StringifyEquator { |
| bool operator()(Expression* lhs, Expression* rhs) const { |
| if (Properties::isControlFlowStructure(lhs) && |
| Properties::isControlFlowStructure(rhs)) { |
| auto* iffl = lhs->dynCast<If>(); |
| auto* iffr = rhs->dynCast<If>(); |
| |
| if (iffl && iffr) { |
| return ExpressionAnalyzer::equal(iffl->ifTrue, iffr->ifTrue) && |
| ExpressionAnalyzer::equal(iffl->ifFalse, iffr->ifFalse); |
| } |
| |
| return ExpressionAnalyzer::equal(lhs, rhs); |
| } |
| |
| return ExpressionAnalyzer::shallowEqual(lhs, rhs); |
| } |
| }; |
| |
| struct HashStringifyWalker : public StringifyWalker<HashStringifyWalker> { |
| // After calling walkModule, this vector contains the result of encoding a |
| // wasm module as a string of uint32_t values. Each value represents either an |
| // Expression or a separator to mark the end of control flow. |
| std::vector<uint32_t> hashString; |
| // A monotonic counter used to ensure that unique expressions in the |
| // module are assigned a unique value in the hashString. |
| uint32_t nextVal = 0; |
| // A monotonic counter used to ensure that each separator in the |
| // module is assigned a unique value in the hashString. |
| int32_t nextSeparatorVal = -1; |
| // Contains a mapping of expression pointer to value to ensure we |
| // use the same value for matching expressions. A custom hasher and |
| // equator is provided in order to separate out evaluation of the if-condition |
| // when evaluating if expressions. |
| std::unordered_map<Expression*, uint32_t, StringifyHasher, StringifyEquator> |
| exprToCounter; |
| std::vector<Expression*> exprs; |
| |
| void addUniqueSymbol(SeparatorReason reason); |
| void visitExpression(Expression* curr); |
| // Converts the idx from relative to the beginning of the program to |
| // relative to its enclosing function, and returns the name of its function. |
| std::pair<uint32_t, Name> makeRelative(uint32_t idx) const; |
| |
| private: |
| // Contains the indices that mark the start of each function. |
| std::set<uint32_t> funcIndices; |
| // Maps the start idx of each function to the function name. |
| std::map<uint32_t, Name> idxToFuncName; |
| }; |
| |
| void HashStringifyWalker::addUniqueSymbol(SeparatorReason reason) { |
| // Use a negative value to distinguish symbols for separators from symbols |
| // for Expressions |
| assert((uint32_t)nextSeparatorVal >= nextVal); |
| if (auto funcStart = reason.getFuncStart()) { |
| idxToFuncName.insert({hashString.size(), funcStart->func->name}); |
| } |
| hashString.push_back((uint32_t)nextSeparatorVal); |
| nextSeparatorVal--; |
| exprs.push_back(nullptr); |
| } |
| |
| void HashStringifyWalker::visitExpression(Expression* curr) { |
| auto [it, inserted] = exprToCounter.insert({curr, nextVal}); |
| hashString.push_back(it->second); |
| exprs.push_back(curr); |
| if (inserted) { |
| nextVal++; |
| } |
| } |
| |
| std::pair<uint32_t, Name> |
| HashStringifyWalker::makeRelative(uint32_t idx) const { |
| // The upper_bound function returns an iterator to the first value in the set |
| // that is true for idx < value. We subtract one from this returned value to |
| // tell us which function actually contains the the idx. |
| auto [funcIdx, func] = *--idxToFuncName.upper_bound(idx); |
| return {idx - funcIdx, func}; |
| } |
| |
| using Substrings = std::vector<SuffixTree::RepeatedSubstring>; |
| |
| // Functions that filter vectors of SuffixTree::RepeatedSubstring |
| struct StringifyProcessor { |
| static Substrings repeatSubstrings(std::vector<uint32_t>& hashString); |
| static Substrings dedupe(const Substrings& substrings); |
| static Substrings filterOverlaps(const Substrings& substrings); |
| // Filter is the general purpose function backing subsequent filter functions. |
| // It can be used directly, but generally prefer a wrapper function |
| // to encapsulate your condition and make it available for tests. |
| static Substrings filter(const Substrings& substrings, |
| const std::vector<Expression*>& exprs, |
| std::function<bool(const Expression*)> condition); |
| static Substrings filterLocalSets(const Substrings& substrings, |
| const std::vector<Expression*>& exprs); |
| static Substrings filterLocalGets(const Substrings& substrings, |
| const std::vector<Expression*>& exprs); |
| static Substrings filterBranches(const Substrings& substrings, |
| const std::vector<Expression*>& exprs); |
| }; |
| |
| std::vector<SuffixTree::RepeatedSubstring> |
| StringifyProcessor::repeatSubstrings(std::vector<uint32_t>& hashString) { |
| SuffixTree st(hashString); |
| std::vector<SuffixTree::RepeatedSubstring> substrings(st.begin(), st.end()); |
| for (auto& substring : substrings) { |
| // Sort by increasing start index to ensure determinism. |
| std::sort(substring.StartIndices.begin(), substring.StartIndices.end()); |
| } |
| // Substrings are sorted so that the longest substring that repeats the most |
| // times is ordered first. This is done so that we can assume the most |
| // worthwhile substrings to outline come first. |
| std::sort( |
| substrings.begin(), |
| substrings.end(), |
| [](SuffixTree::RepeatedSubstring a, SuffixTree::RepeatedSubstring b) { |
| size_t aWeight = a.Length * a.StartIndices.size(); |
| size_t bWeight = b.Length * b.StartIndices.size(); |
| if (aWeight == bWeight) { |
| return a.StartIndices[0] < b.StartIndices[0]; |
| } |
| return aWeight > bWeight; |
| }); |
| return substrings; |
| } |
| |
| // Deduplicate substrings by iterating through the list of substrings, keeping |
| // only those whose list of end indices is disjoint from the set of end indices |
| // for all substrings kept so far. Substrings that are contained within other |
| // substrings will always share an end index with those other substrings. Note |
| // that this deduplication may be over-aggressive, since it will remove |
| // substrings that are contained within any previous substring, even if they |
| // have many other occurrences that are not inside other substrings. Part of the |
| // reason dedupe can be so aggressive is an assumption 1) that the input |
| // substrings have been sorted so that the longest substrings with the most |
| // repeats come first and 2) these are more worthwhile to keep than subsequent |
| // substrings of substrings, even if they appear more times. |
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe( |
| const std::vector<SuffixTree::RepeatedSubstring>& substrings) { |
| std::unordered_set<uint32_t> seen; |
| std::vector<SuffixTree::RepeatedSubstring> result; |
| for (auto substring : substrings) { |
| std::vector<uint32_t> idxToInsert; |
| bool seenEndIdx = false; |
| for (auto startIdx : substring.StartIndices) { |
| // We are using the end index to ensure that each repeated substring |
| // reported by the SuffixTree is unique. This is because LLVM's SuffixTree |
| // reports back repeat sequences that are substrings of longer repeat |
| // sequences with the same endIdx, and we generally prefer to outline |
| // longer repeat sequences. |
| uint32_t endIdx = substring.Length + startIdx; |
| if (seen.count(endIdx)) { |
| seenEndIdx = true; |
| break; |
| } |
| idxToInsert.push_back(endIdx); |
| } |
| if (!seenEndIdx) { |
| seen.insert(idxToInsert.begin(), idxToInsert.end()); |
| result.push_back(substring); |
| } |
| } |
| |
| return result; |
| } |
| |
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterOverlaps( |
| const std::vector<SuffixTree::RepeatedSubstring>& substrings) { |
| // A substring represents a contiguous set of instructions that appear more |
| // than once in a Wasm binary. For each appearance of the substring, an |
| // Interval is created that lacks a connection back to its originating |
| // substring. To fix, upon Interval creation, a second vector is populated |
| // with the index of the corresponding substring. |
| std::vector<Interval> intervals; |
| std::vector<int> substringIdxs; |
| |
| // Construct intervals |
| for (Index i = 0; i < substrings.size(); i++) { |
| auto& substring = substrings[i]; |
| for (auto startIdx : substring.StartIndices) { |
| intervals.emplace_back( |
| startIdx, startIdx + substring.Length, substring.Length); |
| substringIdxs.push_back(i); |
| } |
| } |
| |
| // Get the overlapping intervals |
| std::vector<SuffixTree::RepeatedSubstring> result; |
| std::vector<std::vector<Index>> startIndices(substrings.size()); |
| std::vector<int> indices = IntervalProcessor::filterOverlaps(intervals); |
| for (auto i : indices) { |
| // i is the idx of the Interval in the intervals vector |
| // i in substringIdxs returns the idx of the substring that needs to be |
| // included in result |
| auto substringIdx = substringIdxs[i]; |
| startIndices[substringIdx].push_back(intervals[i].start); |
| } |
| for (Index i = 0; i < startIndices.size(); i++) { |
| if (startIndices[i].size() > 1) { |
| result.emplace_back(SuffixTree::RepeatedSubstring( |
| {substrings[i].Length, std::move(startIndices[i])})); |
| } |
| } |
| |
| return result; |
| } |
| |
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter( |
| const std::vector<SuffixTree::RepeatedSubstring>& substrings, |
| const std::vector<Expression*>& exprs, |
| std::function<bool(const Expression*)> condition) { |
| |
| struct FilterStringifyWalker : public StringifyWalker<FilterStringifyWalker> { |
| bool hasFilterValue = false; |
| std::function<bool(const Expression*)> condition; |
| |
| FilterStringifyWalker(std::function<bool(const Expression*)> condition) |
| : condition(condition){}; |
| |
| void walk(Expression* curr) { |
| hasFilterValue = false; |
| Super::walk(curr); |
| flushControlFlowQueue(); |
| } |
| |
| void addUniqueSymbol(SeparatorReason reason) {} |
| |
| void visitExpression(Expression* curr) { |
| if (condition(curr)) { |
| hasFilterValue = true; |
| } |
| } |
| }; |
| |
| FilterStringifyWalker walker(condition); |
| |
| std::vector<SuffixTree::RepeatedSubstring> result; |
| for (auto substring : substrings) { |
| bool hasFilterValue = false; |
| for (auto idx = substring.StartIndices[0], |
| endIdx = substring.StartIndices[0] + substring.Length; |
| idx < endIdx; |
| idx++) { |
| Expression* curr = exprs[idx]; |
| if (Properties::isControlFlowStructure(curr)) { |
| walker.walk(curr); |
| if (walker.hasFilterValue) { |
| hasFilterValue = true; |
| break; |
| } |
| } |
| if (condition(curr)) { |
| hasFilterValue = true; |
| break; |
| } |
| } |
| if (!hasFilterValue) { |
| result.push_back(substring); |
| } |
| } |
| |
| return result; |
| } |
| |
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalSets( |
| const std::vector<SuffixTree::RepeatedSubstring>& substrings, |
| const std::vector<Expression*>& exprs) { |
| return StringifyProcessor::filter( |
| substrings, exprs, [](const Expression* curr) { |
| return curr->is<LocalSet>(); |
| }); |
| } |
| |
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalGets( |
| const std::vector<SuffixTree::RepeatedSubstring>& substrings, |
| const std::vector<Expression*>& exprs) { |
| return StringifyProcessor::filter( |
| substrings, exprs, [](const Expression* curr) { |
| return curr->is<LocalGet>(); |
| }); |
| } |
| |
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterBranches( |
| const std::vector<SuffixTree::RepeatedSubstring>& substrings, |
| const std::vector<Expression*>& exprs) { |
| return StringifyProcessor::filter( |
| substrings, exprs, [](const Expression* curr) { |
| return Properties::isBranch(curr) || curr->is<Return>() || |
| curr->is<TryTable>(); |
| }); |
| } |
| |
| struct OutliningSequence { |
| unsigned startIdx; |
| unsigned endIdx; |
| Name func; |
| bool endsTypeUnreachable; |
| #if OUTLINING_DEBUG |
| unsigned programIdx; |
| #endif |
| |
| OutliningSequence(unsigned startIdx, |
| unsigned endIdx, |
| Name func, |
| bool endsTypeUnreachable |
| #if OUTLINING_DEBUG |
| , |
| unsigned programIdx |
| #endif |
| ) |
| : startIdx(startIdx), endIdx(endIdx), func(func), |
| endsTypeUnreachable(endsTypeUnreachable) |
| #if OUTLINING_DEBUG |
| , |
| programIdx(programIdx) |
| #endif |
| { |
| } |
| }; |
| |
| // Instances of this walker are intended to walk a function at a time, at the |
| // behest of the owner of the instance. |
| struct ReconstructStringifyWalker |
| : public StringifyWalker<ReconstructStringifyWalker> { |
| |
| ReconstructStringifyWalker(Module* wasm, Function* func) |
| : existingBuilder(*wasm), outlinedBuilder(*wasm), func(func) { |
| this->setModule(wasm); |
| ODBG(std::cerr << "\nexistingBuilder: " << &existingBuilder |
| << " outlinedBuilder: " << &outlinedBuilder << "\n"); |
| } |
| |
| // As we reconstruct the IR during outlining, we need to know what |
| // state we're in to determine which IRBuilder to send the instruction to. |
| enum ReconstructState { |
| NotInSeq = 0, // Will not be outlined into a new function. |
| InSeq = 1, // Currently being outlined into a new function. |
| InSkipSeq = 2, // A sequence that has already been outlined. |
| }; |
| // We begin with the assumption that we are not currently in a sequence that |
| // will be outlined. |
| ReconstructState state = ReconstructState::NotInSeq; |
| |
| // The list of sequences that will be outlined, contained in the function |
| // currently being walked. |
| std::vector<OutliningSequence> sequences; |
| // Tracks the OutliningSequence the walker is about to outline or is currently |
| // outlining. |
| uint32_t seqCounter = 0; |
| // Counts the number of instructions visited since the function began, |
| // corresponds to the indices in the sequences. |
| uint32_t instrCounter = 0; |
| // A reusable builder for reconstructing the function that will have sequences |
| // of instructions removed to be placed into an outlined function. The removed |
| // sequences will be replaced by a call to the outlined function. |
| IRBuilder existingBuilder; |
| // A reusable builder for constructing the outlined functions that will |
| // contain repeat sequences found in the program. |
| IRBuilder outlinedBuilder; |
| |
| // The function we are outlining from. |
| Function* func; |
| |
| void addUniqueSymbol(SeparatorReason reason) { |
| if (auto curr = reason.getFuncStart()) { |
| startExistingFunction(curr->func); |
| return; |
| } |
| |
| // instrCounter is managed manually and incremented at the beginning of |
| // addUniqueSymbol() and visitExpression(), except for the case where we are |
| // starting a new function, as that resets the counters back to 0. |
| instrCounter++; |
| |
| ODBG(std::string desc); |
| if (auto curr = reason.getBlockStart()) { |
| ODBG(desc = "Block Start at "); |
| ASSERT_OK(existingBuilder.visitBlockStart(curr->block)); |
| } else if (auto curr = reason.getIfStart()) { |
| // IR builder needs the condition of the If pushed onto the builder before |
| // visitIfStart(), which will expect to be able to pop the condition. |
| // This is always okay to do because the correct condition was installed |
| // onto the If when the outer scope was visited. |
| existingBuilder.pushSynthetic(curr->iff->condition); |
| ODBG(desc = "If Start at "); |
| ASSERT_OK(existingBuilder.visitIfStart(curr->iff)); |
| } else if (reason.getElseStart()) { |
| ODBG(desc = "Else Start at "); |
| ASSERT_OK(existingBuilder.visitElse()); |
| } else if (auto curr = reason.getLoopStart()) { |
| ODBG(desc = "Loop Start at "); |
| ASSERT_OK(existingBuilder.visitLoopStart(curr->loop)); |
| } else if (auto curr = reason.getTryStart()) { |
| // We preserve the name of the tryy because IRBuilder expects |
| // visitTryStart() to be called on an empty Try, during the normal case of |
| // parsing. TODO: Fix this. |
| auto name = curr->tryy->name; |
| ASSERT_OK(existingBuilder.visitTryStart(curr->tryy, Name())); |
| ODBG(desc = "Try Start at "); |
| curr->tryy->name = name; |
| } else if (auto curr = reason.getCatchStart()) { |
| ASSERT_OK(existingBuilder.visitCatch(curr->tag)); |
| ODBG(desc = "Catch Start at "); |
| } else if (reason.getCatchAllStart()) { |
| ASSERT_OK(existingBuilder.visitCatchAll()); |
| ODBG(desc = "Catch All Start at"); |
| } else if (auto curr = reason.getTryTableStart()) { |
| ODBG(desc = "Try Table Start at "); |
| ASSERT_OK(existingBuilder.visitTryTableStart(curr->tryt)); |
| } else if (reason.getEnd()) { |
| ODBG(desc = "End at "); |
| ASSERT_OK(existingBuilder.visitEnd()); |
| // Reset the function in case we just ended the function scope. |
| existingBuilder.setFunction(func); |
| // Outlining performs an unnested walk of the Wasm module, visiting |
| // each scope one at a time. IRBuilder, in contrast, expects to |
| // visit several nested scopes at a time. Thus, calling end() finalizes |
| // the control flow and places it on IRBuilder's internal stack, ready for |
| // the enclosing scope to consume its expressions off the stack. Since |
| // outlining walks unnested, the enclosing scope never arrives to retrieve |
| // its expressions off the stack, so we must call build() after visitEnd() |
| // to clear the internal stack IRBuilder manages. |
| ASSERT_OK(existingBuilder.build()); |
| } else { |
| ODBG(desc = "addUniqueSymbol for unimplemented control flow "); |
| WASM_UNREACHABLE("unimplemented control flow"); |
| } |
| ODBG(printAddUniqueSymbol(desc)); |
| } |
| |
| void visitExpression(Expression* curr) { |
| maybeBeginSeq(); |
| |
| IRBuilder* builder = state == InSeq ? &outlinedBuilder |
| : state == NotInSeq ? &existingBuilder |
| : nullptr; |
| if (builder) { |
| if (auto* expr = curr->dynCast<Break>()) { |
| Type type = expr->value ? expr->value->type : Type::none; |
| ASSERT_OK(builder->visitBreakWithType(expr, type)); |
| } else if (auto* expr = curr->dynCast<Switch>()) { |
| Type type = expr->value ? expr->value->type : Type::none; |
| ASSERT_OK(builder->visitSwitchWithType(expr, type)); |
| } else { |
| // Assert ensures new unhandled branch instructions |
| // will quickly cause an error. Serves as a reminder to |
| // implement a new special-case visit*WithType. |
| assert(curr->is<BrOn>() || !Properties::isBranch(curr)); |
| ASSERT_OK(builder->visit(curr)); |
| } |
| } |
| ODBG(printVisitExpression(curr)); |
| |
| if (state == InSeq || state == InSkipSeq) { |
| maybeEndSeq(); |
| } |
| } |
| |
| // Helpers |
| void startExistingFunction(Function* func) { |
| ASSERT_OK(existingBuilder.build()); |
| ASSERT_OK(existingBuilder.visitFunctionStart(func)); |
| instrCounter = 0; |
| seqCounter = 0; |
| state = NotInSeq; |
| ODBG(std::cerr << "\n" |
| << "Func Start to $" << func->name << " at " |
| << &existingBuilder << "\n"); |
| } |
| |
| ReconstructState getCurrState() { |
| // We are either in a sequence or not in a sequence. If we are in a sequence |
| // and have already created the body of the outlined function that will be |
| // called, then we will skip instructions, otherwise we add the instructions |
| // to the outlined function. If we are not in a sequence, then the |
| // instructions are sent to the existing function. |
| if (seqCounter < sequences.size() && |
| instrCounter >= sequences[seqCounter].startIdx && |
| instrCounter < sequences[seqCounter].endIdx) { |
| return getModule()->getFunction(sequences[seqCounter].func)->body |
| ? InSkipSeq |
| : InSeq; |
| } |
| return NotInSeq; |
| } |
| |
| void maybeBeginSeq() { |
| instrCounter++; |
| auto currState = getCurrState(); |
| if (currState != state) { |
| switch (currState) { |
| case NotInSeq: |
| break; |
| case InSeq: |
| transitionToInSeq(); |
| break; |
| case InSkipSeq: |
| transitionToInSkipSeq(); |
| break; |
| } |
| } |
| state = currState; |
| } |
| |
| void transitionToInSeq() { |
| Function* outlinedFunc = |
| getModule()->getFunction(sequences[seqCounter].func); |
| ASSERT_OK(outlinedBuilder.visitFunctionStart(outlinedFunc)); |
| |
| // Make a call from the existing function to the outlined function. This |
| // call will replace the instructions moved to the outlined function. |
| ODBG(std::cerr << "\nadding call " << outlinedFunc->name << " to " |
| << &existingBuilder << "\n"); |
| ASSERT_OK(existingBuilder.makeCall(outlinedFunc->name, false)); |
| |
| // If the last instruction of the outlined sequence is unreachable, insert |
| // an unreachable instruction immediately after the call to the outlined |
| // function. This maintains the unreachable type in the original scope |
| // of the outlined sequence. |
| if (sequences[seqCounter].endsTypeUnreachable) { |
| ODBG(std::cerr << "\nadding endsUnreachable to " << &existingBuilder |
| << "\n"); |
| ASSERT_OK(existingBuilder.makeUnreachable()); |
| } |
| |
| // Add a local.get instruction for every parameter of the outlined function. |
| Signature sig = outlinedFunc->getSig(); |
| ODBG(std::cerr << outlinedFunc->name << " takes " << sig.params.size() |
| << " parameters\n"); |
| for (Index i = 0; i < sig.params.size(); i++) { |
| ODBG(std::cerr << "adding local.get $" << i << " to " << &outlinedBuilder |
| << "\n"); |
| ASSERT_OK(outlinedBuilder.makeLocalGet(i)); |
| } |
| } |
| |
| void transitionToInSkipSeq() { |
| Function* outlinedFunc = |
| getModule()->getFunction(sequences[seqCounter].func); |
| ODBG(std::cerr << "\nstarting to skip instructions " |
| << sequences[seqCounter].startIdx << " - " |
| << sequences[seqCounter].endIdx - 1 << " to " |
| << sequences[seqCounter].func |
| << " and adding call() instead\n"); |
| ASSERT_OK(existingBuilder.makeCall(outlinedFunc->name, false)); |
| // If the last instruction of the outlined sequence is unreachable, insert |
| // an unreachable instruction immediately after the call to the outlined |
| // function. This maintains the unreachable type in the original scope |
| // of the outlined sequence. |
| if (sequences[seqCounter].endsTypeUnreachable) { |
| ASSERT_OK(existingBuilder.makeUnreachable()); |
| } |
| } |
| |
| void maybeEndSeq() { |
| if (instrCounter + 1 == sequences[seqCounter].endIdx) { |
| transitionToNotInSeq(); |
| state = NotInSeq; |
| } |
| } |
| |
| void transitionToNotInSeq() { |
| ODBG(std::cerr << "End of sequence "); |
| if (state == InSeq) { |
| ODBG(std::cerr << "to " << &outlinedBuilder); |
| ASSERT_OK(outlinedBuilder.visitEnd()); |
| } |
| ODBG(std::cerr << "\n\n"); |
| // Completed a sequence so increase the seqCounter and reset the state. |
| seqCounter++; |
| } |
| |
| #if OUTLINING_DEBUG |
| void printAddUniqueSymbol(std::string desc) { |
| std::cerr << desc << std::to_string(instrCounter) << " to " |
| << &existingBuilder << "\n"; |
| } |
| |
| void printVisitExpression(Expression* curr) { |
| auto* builder = state == InSeq ? &outlinedBuilder |
| : state == NotInSeq ? &existingBuilder |
| : nullptr; |
| auto verb = state == InSkipSeq ? "skipping " : "adding "; |
| std::cerr << verb << std::to_string(instrCounter) << ": " |
| << ShallowExpression{curr} << "(" << curr << ") to " << builder |
| << "\n"; |
| } |
| #endif |
| }; |
| |
| struct Outlining : public Pass { |
| void run(Module* module) { |
| HashStringifyWalker stringify; |
| // Walk the module and create a "string representation" of the program. |
| stringify.walkModule(module); |
| ODBG(printHashString(stringify.hashString, stringify.exprs)); |
| // Collect all of the substrings of the string representation that appear |
| // more than once in the program. |
| auto substrings = |
| StringifyProcessor::repeatSubstrings(stringify.hashString); |
| // Remove substrings that are substrings of longer repeat substrings. |
| substrings = StringifyProcessor::dedupe(substrings); |
| // Remove substrings with overlapping indices. |
| substrings = StringifyProcessor::filterOverlaps(substrings); |
| // Remove substrings with branch, return, and try_table instructions until |
| // an analysis is performed to see if the intended destination of the branch |
| // is included in the substring to be outlined. |
| substrings = |
| StringifyProcessor::filterBranches(substrings, stringify.exprs); |
| // Remove substrings with local.set instructions until Outlining is extended |
| // to support arranging for the written values to be returned from the |
| // outlined function and written back to the original locals. |
| substrings = |
| StringifyProcessor::filterLocalSets(substrings, stringify.exprs); |
| // Remove substrings with local.get instructions until Outlining is extended |
| // to support passing the local values as additional arguments to the |
| // outlined function. |
| substrings = |
| StringifyProcessor::filterLocalGets(substrings, stringify.exprs); |
| // Convert substrings to sequences that are more easily outlineable as we |
| // walk the functions in a module. Sequences contain indices that |
| // are relative to the enclosing function while substrings have indices |
| // relative to the entire program. |
| auto sequences = makeSequences(module, substrings, stringify); |
| outline(module, |
| sequences |
| #if OUTLINING_DEBUG |
| , |
| stringify |
| #endif |
| ); |
| // Position the outlined functions first in the functions vector to make |
| // the outlining lit tests far more readable. |
| moveOutlinedFunctions(module, substrings.size()); |
| |
| // Because we visit control flow in stringified order rather than normal |
| // postorder, IRBuilder is not able to properly track branches, so it may |
| // not have finalized blocks with the correct types. ReFinalize now to fix |
| // any issues. |
| PassRunner runner(getPassRunner()); |
| runner.add(std::make_unique<ReFinalize>()); |
| runner.run(); |
| } |
| |
| Name addOutlinedFunction(Module* module, |
| const SuffixTree::RepeatedSubstring& substring, |
| const std::vector<Expression*>& exprs) { |
| auto startIdx = substring.StartIndices[0]; |
| // The outlined functions can be named anything. |
| Name func = Names::getValidFunctionName(*module, std::string("outline$")); |
| // Calculate the function signature for the outlined sequence. |
| StackSignature sig; |
| for (uint32_t exprIdx = startIdx; exprIdx < startIdx + substring.Length; |
| exprIdx++) { |
| sig += StackSignature(exprs[exprIdx]); |
| } |
| module->addFunction(Builder::makeFunction( |
| func, Type(Signature(sig.params, sig.results), NonNullable, Exact), {})); |
| return func; |
| } |
| |
| using Sequences = |
| std::unordered_map<Name, std::vector<wasm::OutliningSequence>>; |
| |
| // Converts an array of SuffixTree::RepeatedSubstring to a mapping of original |
| // functions to repeated sequences they contain. These sequences are ordered |
| // by start index by construction because the substring's start indices are |
| // ordered. |
| Sequences makeSequences(Module* module, |
| const Substrings& substrings, |
| const HashStringifyWalker& stringify) { |
| Sequences seqByFunc; |
| for (auto& substring : substrings) { |
| auto func = addOutlinedFunction(module, substring, stringify.exprs); |
| for (auto seqIdx : substring.StartIndices) { |
| // seqIdx is relative to the entire program; making the idx of the |
| // sequence relative to its function is better for outlining because we |
| // walk functions. |
| auto [relativeIdx, existingFunc] = stringify.makeRelative(seqIdx); |
| auto seq = OutliningSequence( |
| relativeIdx, |
| relativeIdx + substring.Length, |
| func, |
| stringify.exprs[seqIdx + substring.Length - 1]->type == |
| Type::unreachable |
| #if OUTLINING_DEBUG |
| , |
| seqIdx |
| #endif |
| ); |
| seqByFunc[existingFunc].push_back(seq); |
| } |
| } |
| return seqByFunc; |
| } |
| |
| void outline(Module* module, |
| Sequences seqByFunc |
| #if OUTLINING_DEBUG |
| , |
| const HashStringifyWalker& stringify |
| #endif |
| ) { |
| // TODO: Make this a function-parallel sub-pass. |
| std::vector<Name> keys(seqByFunc.size()); |
| std::transform(seqByFunc.begin(), |
| seqByFunc.end(), |
| keys.begin(), |
| [](auto pair) { return pair.first; }); |
| for (auto func : keys) { |
| // During function reconstruction, a walker iterates thru each instruction |
| // of a function, incrementing a counter to find matching sequences. As a |
| // result, the sequences of a function must be sorted by |
| // smallest start index, otherwise reconstruction will miss outlining a |
| // repeat sequence. |
| std::sort(seqByFunc[func].begin(), |
| seqByFunc[func].end(), |
| [](OutliningSequence a, OutliningSequence b) { |
| return a.startIdx < b.startIdx; |
| }); |
| ReconstructStringifyWalker reconstruct(module, module->getFunction(func)); |
| reconstruct.sequences = std::move(seqByFunc[func]); |
| ODBG(printReconstruct(module, |
| stringify.hashString, |
| stringify.exprs, |
| func, |
| reconstruct.sequences)); |
| reconstruct.doWalkFunction(module->getFunction(func)); |
| } |
| } |
| |
| void moveOutlinedFunctions(Module* module, uint32_t outlinedCount) { |
| // Rearrange outlined functions to the beginning of the functions vector by |
| // using std::make_move_iterator to avoid making copies. A temp vector is |
| // created to avoid iterator invalidation. |
| auto count = module->functions.size(); |
| std::vector<std::unique_ptr<Function>> temp( |
| std::make_move_iterator(module->functions.end() - outlinedCount), |
| std::make_move_iterator(module->functions.end())); |
| module->functions.insert(module->functions.begin(), |
| std::make_move_iterator(temp.begin()), |
| std::make_move_iterator(temp.end())); |
| module->functions.resize(count); |
| // After the functions vector is directly manipulated, we need to call |
| // updateFunctionsMap(). |
| module->updateFunctionsMap(); |
| } |
| |
| #if OUTLINING_DEBUG |
| void printHashString(const std::vector<uint32_t>& hashString, |
| const std::vector<Expression*>& exprs) { |
| std::cerr << "\n\n"; |
| for (Index idx = 0; idx < hashString.size(); idx++) { |
| Expression* expr = exprs[idx]; |
| if (expr) { |
| std::cerr << idx << " - " << hashString[idx] << ": " |
| << ShallowExpression{expr} << "\n"; |
| } else { |
| std::cerr << idx << ": unique symbol\n"; |
| } |
| } |
| } |
| void printReconstruct(Module* module, |
| const std::vector<uint32_t>& hashString, |
| const std::vector<Expression*>& exprs, |
| Name existingFunc, |
| const std::vector<OutliningSequence>& seqs) { |
| std::cerr << "\n\nReconstructing existing fn: " << existingFunc << "\n"; |
| std::cerr << "moving sequences: " |
| << "\n"; |
| for (auto& seq : seqs) { |
| for (Index idx = seq.programIdx; |
| idx < seq.programIdx + (seq.endIdx - seq.startIdx); |
| idx++) { |
| Expression* expr = exprs[idx]; |
| if (expr == nullptr) { |
| std::cerr << "unique symbol\n"; |
| } else { |
| std::cerr << idx << " - " << hashString[idx] << " - " << seq.startIdx |
| << " : " << ShallowExpression{expr} << "\n"; |
| } |
| } |
| std::cerr << "to outlined function: " << seq.func << "\n"; |
| auto outlinedFunction = module->getFunction(seq.func); |
| std::cerr << "with signature: " << outlinedFunction->type.toString() |
| << "\n"; |
| } |
| } |
| #endif |
| }; |
| |
| Pass* createOutliningPass() { return new Outlining(); } |
| |
| } // namespace wasm |