blob: 9bf2f1902520c263e814712c175bc4f1b3234781 [file] [log] [blame] [edit]
/*
* Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ir/names.h"
#include "ir/stack-utils.h"
#include "ir/utils.h"
#include "pass.h"
#include "passes/stringify-walker.h"
#include "support/intervals.h"
#include "support/suffix_tree.h"
#include "wasm-ir-builder.h"
#include "wasm.h"
#define OUTLINING_DEBUG 0
#if OUTLINING_DEBUG
#define ODBG(statement) statement
#else
#define ODBG(statement)
#endif
// Check a Result or MaybeResult for error and call Fatal() if the error exists.
#define ASSERT_OK(val) \
if (auto _val = (val); auto err = _val.getErr()) { \
Fatal() << err->msg; \
}
namespace wasm {
// This custom hasher conforms to std::hash<Key>. Its purpose is to provide
// a custom hash for if expressions, so the if-condition of the if expression is
// not included in the hash for the if expression. This is needed because in the
// binary format, the if-condition comes before and is consumed by the if. To
// match the binary format, we hash the if condition before and separately from
// the rest of the if expression.
struct StringifyHasher {
size_t operator()(Expression* curr) const {
if (Properties::isControlFlowStructure(curr)) {
if (auto* iff = curr->dynCast<If>()) {
size_t digest = wasm::hash(iff->_id);
rehash(digest, ExpressionAnalyzer::hash(iff->ifTrue));
if (iff->ifFalse) {
rehash(digest, ExpressionAnalyzer::hash(iff->ifFalse));
}
return digest;
}
return ExpressionAnalyzer::hash(curr);
}
return ExpressionAnalyzer::shallowHash(curr);
}
};
// This custom equator conforms to std::equal_to<Key>. Similar to
// StringifyHasher, it's purpose is to not include the if-condition when
// evaluating the equality of two if expressions.
struct StringifyEquator {
bool operator()(Expression* lhs, Expression* rhs) const {
if (Properties::isControlFlowStructure(lhs) &&
Properties::isControlFlowStructure(rhs)) {
auto* iffl = lhs->dynCast<If>();
auto* iffr = rhs->dynCast<If>();
if (iffl && iffr) {
return ExpressionAnalyzer::equal(iffl->ifTrue, iffr->ifTrue) &&
ExpressionAnalyzer::equal(iffl->ifFalse, iffr->ifFalse);
}
return ExpressionAnalyzer::equal(lhs, rhs);
}
return ExpressionAnalyzer::shallowEqual(lhs, rhs);
}
};
struct HashStringifyWalker : public StringifyWalker<HashStringifyWalker> {
// After calling walkModule, this vector contains the result of encoding a
// wasm module as a string of uint32_t values. Each value represents either an
// Expression or a separator to mark the end of control flow.
std::vector<uint32_t> hashString;
// A monotonic counter used to ensure that unique expressions in the
// module are assigned a unique value in the hashString.
uint32_t nextVal = 0;
// A monotonic counter used to ensure that each separator in the
// module is assigned a unique value in the hashString.
int32_t nextSeparatorVal = -1;
// Contains a mapping of expression pointer to value to ensure we
// use the same value for matching expressions. A custom hasher and
// equator is provided in order to separate out evaluation of the if-condition
// when evaluating if expressions.
std::unordered_map<Expression*, uint32_t, StringifyHasher, StringifyEquator>
exprToCounter;
std::vector<Expression*> exprs;
void addUniqueSymbol(SeparatorReason reason);
void visitExpression(Expression* curr);
// Converts the idx from relative to the beginning of the program to
// relative to its enclosing function, and returns the name of its function.
std::pair<uint32_t, Name> makeRelative(uint32_t idx) const;
private:
// Contains the indices that mark the start of each function.
std::set<uint32_t> funcIndices;
// Maps the start idx of each function to the function name.
std::map<uint32_t, Name> idxToFuncName;
};
void HashStringifyWalker::addUniqueSymbol(SeparatorReason reason) {
// Use a negative value to distinguish symbols for separators from symbols
// for Expressions
assert((uint32_t)nextSeparatorVal >= nextVal);
if (auto funcStart = reason.getFuncStart()) {
idxToFuncName.insert({hashString.size(), funcStart->func->name});
}
hashString.push_back((uint32_t)nextSeparatorVal);
nextSeparatorVal--;
exprs.push_back(nullptr);
}
void HashStringifyWalker::visitExpression(Expression* curr) {
auto [it, inserted] = exprToCounter.insert({curr, nextVal});
hashString.push_back(it->second);
exprs.push_back(curr);
if (inserted) {
nextVal++;
}
}
std::pair<uint32_t, Name>
HashStringifyWalker::makeRelative(uint32_t idx) const {
// The upper_bound function returns an iterator to the first value in the set
// that is true for idx < value. We subtract one from this returned value to
// tell us which function actually contains the the idx.
auto [funcIdx, func] = *--idxToFuncName.upper_bound(idx);
return {idx - funcIdx, func};
}
using Substrings = std::vector<SuffixTree::RepeatedSubstring>;
// Functions that filter vectors of SuffixTree::RepeatedSubstring
struct StringifyProcessor {
static Substrings repeatSubstrings(std::vector<uint32_t>& hashString);
static Substrings dedupe(const Substrings& substrings);
static Substrings filterOverlaps(const Substrings& substrings);
// Filter is the general purpose function backing subsequent filter functions.
// It can be used directly, but generally prefer a wrapper function
// to encapsulate your condition and make it available for tests.
static Substrings filter(const Substrings& substrings,
const std::vector<Expression*>& exprs,
std::function<bool(const Expression*)> condition);
static Substrings filterLocalSets(const Substrings& substrings,
const std::vector<Expression*>& exprs);
static Substrings filterLocalGets(const Substrings& substrings,
const std::vector<Expression*>& exprs);
static Substrings filterBranches(const Substrings& substrings,
const std::vector<Expression*>& exprs);
};
std::vector<SuffixTree::RepeatedSubstring>
StringifyProcessor::repeatSubstrings(std::vector<uint32_t>& hashString) {
SuffixTree st(hashString);
std::vector<SuffixTree::RepeatedSubstring> substrings(st.begin(), st.end());
for (auto& substring : substrings) {
// Sort by increasing start index to ensure determinism.
std::sort(substring.StartIndices.begin(), substring.StartIndices.end());
}
// Substrings are sorted so that the longest substring that repeats the most
// times is ordered first. This is done so that we can assume the most
// worthwhile substrings to outline come first.
std::sort(
substrings.begin(),
substrings.end(),
[](SuffixTree::RepeatedSubstring a, SuffixTree::RepeatedSubstring b) {
size_t aWeight = a.Length * a.StartIndices.size();
size_t bWeight = b.Length * b.StartIndices.size();
if (aWeight == bWeight) {
return a.StartIndices[0] < b.StartIndices[0];
}
return aWeight > bWeight;
});
return substrings;
}
// Deduplicate substrings by iterating through the list of substrings, keeping
// only those whose list of end indices is disjoint from the set of end indices
// for all substrings kept so far. Substrings that are contained within other
// substrings will always share an end index with those other substrings. Note
// that this deduplication may be over-aggressive, since it will remove
// substrings that are contained within any previous substring, even if they
// have many other occurrences that are not inside other substrings. Part of the
// reason dedupe can be so aggressive is an assumption 1) that the input
// substrings have been sorted so that the longest substrings with the most
// repeats come first and 2) these are more worthwhile to keep than subsequent
// substrings of substrings, even if they appear more times.
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe(
const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
std::unordered_set<uint32_t> seen;
std::vector<SuffixTree::RepeatedSubstring> result;
for (auto substring : substrings) {
std::vector<uint32_t> idxToInsert;
bool seenEndIdx = false;
for (auto startIdx : substring.StartIndices) {
// We are using the end index to ensure that each repeated substring
// reported by the SuffixTree is unique. This is because LLVM's SuffixTree
// reports back repeat sequences that are substrings of longer repeat
// sequences with the same endIdx, and we generally prefer to outline
// longer repeat sequences.
uint32_t endIdx = substring.Length + startIdx;
if (seen.count(endIdx)) {
seenEndIdx = true;
break;
}
idxToInsert.push_back(endIdx);
}
if (!seenEndIdx) {
seen.insert(idxToInsert.begin(), idxToInsert.end());
result.push_back(substring);
}
}
return result;
}
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterOverlaps(
const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
// A substring represents a contiguous set of instructions that appear more
// than once in a Wasm binary. For each appearance of the substring, an
// Interval is created that lacks a connection back to its originating
// substring. To fix, upon Interval creation, a second vector is populated
// with the index of the corresponding substring.
std::vector<Interval> intervals;
std::vector<int> substringIdxs;
// Construct intervals
for (Index i = 0; i < substrings.size(); i++) {
auto& substring = substrings[i];
for (auto startIdx : substring.StartIndices) {
intervals.emplace_back(
startIdx, startIdx + substring.Length, substring.Length);
substringIdxs.push_back(i);
}
}
// Get the overlapping intervals
std::vector<SuffixTree::RepeatedSubstring> result;
std::vector<std::vector<Index>> startIndices(substrings.size());
std::vector<int> indices = IntervalProcessor::filterOverlaps(intervals);
for (auto i : indices) {
// i is the idx of the Interval in the intervals vector
// i in substringIdxs returns the idx of the substring that needs to be
// included in result
auto substringIdx = substringIdxs[i];
startIndices[substringIdx].push_back(intervals[i].start);
}
for (Index i = 0; i < startIndices.size(); i++) {
if (startIndices[i].size() > 1) {
result.emplace_back(SuffixTree::RepeatedSubstring(
{substrings[i].Length, std::move(startIndices[i])}));
}
}
return result;
}
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter(
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
const std::vector<Expression*>& exprs,
std::function<bool(const Expression*)> condition) {
struct FilterStringifyWalker : public StringifyWalker<FilterStringifyWalker> {
bool hasFilterValue = false;
std::function<bool(const Expression*)> condition;
FilterStringifyWalker(std::function<bool(const Expression*)> condition)
: condition(condition){};
void walk(Expression* curr) {
hasFilterValue = false;
Super::walk(curr);
flushControlFlowQueue();
}
void addUniqueSymbol(SeparatorReason reason) {}
void visitExpression(Expression* curr) {
if (condition(curr)) {
hasFilterValue = true;
}
}
};
FilterStringifyWalker walker(condition);
std::vector<SuffixTree::RepeatedSubstring> result;
for (auto substring : substrings) {
bool hasFilterValue = false;
for (auto idx = substring.StartIndices[0],
endIdx = substring.StartIndices[0] + substring.Length;
idx < endIdx;
idx++) {
Expression* curr = exprs[idx];
if (Properties::isControlFlowStructure(curr)) {
walker.walk(curr);
if (walker.hasFilterValue) {
hasFilterValue = true;
break;
}
}
if (condition(curr)) {
hasFilterValue = true;
break;
}
}
if (!hasFilterValue) {
result.push_back(substring);
}
}
return result;
}
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalSets(
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
const std::vector<Expression*>& exprs) {
return StringifyProcessor::filter(
substrings, exprs, [](const Expression* curr) {
return curr->is<LocalSet>();
});
}
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalGets(
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
const std::vector<Expression*>& exprs) {
return StringifyProcessor::filter(
substrings, exprs, [](const Expression* curr) {
return curr->is<LocalGet>();
});
}
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterBranches(
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
const std::vector<Expression*>& exprs) {
return StringifyProcessor::filter(
substrings, exprs, [](const Expression* curr) {
return Properties::isBranch(curr) || curr->is<Return>() ||
curr->is<TryTable>();
});
}
struct OutliningSequence {
unsigned startIdx;
unsigned endIdx;
Name func;
bool endsTypeUnreachable;
#if OUTLINING_DEBUG
unsigned programIdx;
#endif
OutliningSequence(unsigned startIdx,
unsigned endIdx,
Name func,
bool endsTypeUnreachable
#if OUTLINING_DEBUG
,
unsigned programIdx
#endif
)
: startIdx(startIdx), endIdx(endIdx), func(func),
endsTypeUnreachable(endsTypeUnreachable)
#if OUTLINING_DEBUG
,
programIdx(programIdx)
#endif
{
}
};
// Instances of this walker are intended to walk a function at a time, at the
// behest of the owner of the instance.
struct ReconstructStringifyWalker
: public StringifyWalker<ReconstructStringifyWalker> {
ReconstructStringifyWalker(Module* wasm, Function* func)
: existingBuilder(*wasm), outlinedBuilder(*wasm), func(func) {
this->setModule(wasm);
ODBG(std::cerr << "\nexistingBuilder: " << &existingBuilder
<< " outlinedBuilder: " << &outlinedBuilder << "\n");
}
// As we reconstruct the IR during outlining, we need to know what
// state we're in to determine which IRBuilder to send the instruction to.
enum ReconstructState {
NotInSeq = 0, // Will not be outlined into a new function.
InSeq = 1, // Currently being outlined into a new function.
InSkipSeq = 2, // A sequence that has already been outlined.
};
// We begin with the assumption that we are not currently in a sequence that
// will be outlined.
ReconstructState state = ReconstructState::NotInSeq;
// The list of sequences that will be outlined, contained in the function
// currently being walked.
std::vector<OutliningSequence> sequences;
// Tracks the OutliningSequence the walker is about to outline or is currently
// outlining.
uint32_t seqCounter = 0;
// Counts the number of instructions visited since the function began,
// corresponds to the indices in the sequences.
uint32_t instrCounter = 0;
// A reusable builder for reconstructing the function that will have sequences
// of instructions removed to be placed into an outlined function. The removed
// sequences will be replaced by a call to the outlined function.
IRBuilder existingBuilder;
// A reusable builder for constructing the outlined functions that will
// contain repeat sequences found in the program.
IRBuilder outlinedBuilder;
// The function we are outlining from.
Function* func;
void addUniqueSymbol(SeparatorReason reason) {
if (auto curr = reason.getFuncStart()) {
startExistingFunction(curr->func);
return;
}
// instrCounter is managed manually and incremented at the beginning of
// addUniqueSymbol() and visitExpression(), except for the case where we are
// starting a new function, as that resets the counters back to 0.
instrCounter++;
ODBG(std::string desc);
if (auto curr = reason.getBlockStart()) {
ODBG(desc = "Block Start at ");
ASSERT_OK(existingBuilder.visitBlockStart(curr->block));
} else if (auto curr = reason.getIfStart()) {
// IR builder needs the condition of the If pushed onto the builder before
// visitIfStart(), which will expect to be able to pop the condition.
// This is always okay to do because the correct condition was installed
// onto the If when the outer scope was visited.
existingBuilder.pushSynthetic(curr->iff->condition);
ODBG(desc = "If Start at ");
ASSERT_OK(existingBuilder.visitIfStart(curr->iff));
} else if (reason.getElseStart()) {
ODBG(desc = "Else Start at ");
ASSERT_OK(existingBuilder.visitElse());
} else if (auto curr = reason.getLoopStart()) {
ODBG(desc = "Loop Start at ");
ASSERT_OK(existingBuilder.visitLoopStart(curr->loop));
} else if (auto curr = reason.getTryStart()) {
// We preserve the name of the tryy because IRBuilder expects
// visitTryStart() to be called on an empty Try, during the normal case of
// parsing. TODO: Fix this.
auto name = curr->tryy->name;
ASSERT_OK(existingBuilder.visitTryStart(curr->tryy, Name()));
ODBG(desc = "Try Start at ");
curr->tryy->name = name;
} else if (auto curr = reason.getCatchStart()) {
ASSERT_OK(existingBuilder.visitCatch(curr->tag));
ODBG(desc = "Catch Start at ");
} else if (reason.getCatchAllStart()) {
ASSERT_OK(existingBuilder.visitCatchAll());
ODBG(desc = "Catch All Start at");
} else if (auto curr = reason.getTryTableStart()) {
ODBG(desc = "Try Table Start at ");
ASSERT_OK(existingBuilder.visitTryTableStart(curr->tryt));
} else if (reason.getEnd()) {
ODBG(desc = "End at ");
ASSERT_OK(existingBuilder.visitEnd());
// Reset the function in case we just ended the function scope.
existingBuilder.setFunction(func);
// Outlining performs an unnested walk of the Wasm module, visiting
// each scope one at a time. IRBuilder, in contrast, expects to
// visit several nested scopes at a time. Thus, calling end() finalizes
// the control flow and places it on IRBuilder's internal stack, ready for
// the enclosing scope to consume its expressions off the stack. Since
// outlining walks unnested, the enclosing scope never arrives to retrieve
// its expressions off the stack, so we must call build() after visitEnd()
// to clear the internal stack IRBuilder manages.
ASSERT_OK(existingBuilder.build());
} else {
ODBG(desc = "addUniqueSymbol for unimplemented control flow ");
WASM_UNREACHABLE("unimplemented control flow");
}
ODBG(printAddUniqueSymbol(desc));
}
void visitExpression(Expression* curr) {
maybeBeginSeq();
IRBuilder* builder = state == InSeq ? &outlinedBuilder
: state == NotInSeq ? &existingBuilder
: nullptr;
if (builder) {
if (auto* expr = curr->dynCast<Break>()) {
Type type = expr->value ? expr->value->type : Type::none;
ASSERT_OK(builder->visitBreakWithType(expr, type));
} else if (auto* expr = curr->dynCast<Switch>()) {
Type type = expr->value ? expr->value->type : Type::none;
ASSERT_OK(builder->visitSwitchWithType(expr, type));
} else {
// Assert ensures new unhandled branch instructions
// will quickly cause an error. Serves as a reminder to
// implement a new special-case visit*WithType.
assert(curr->is<BrOn>() || !Properties::isBranch(curr));
ASSERT_OK(builder->visit(curr));
}
}
ODBG(printVisitExpression(curr));
if (state == InSeq || state == InSkipSeq) {
maybeEndSeq();
}
}
// Helpers
void startExistingFunction(Function* func) {
ASSERT_OK(existingBuilder.build());
ASSERT_OK(existingBuilder.visitFunctionStart(func));
instrCounter = 0;
seqCounter = 0;
state = NotInSeq;
ODBG(std::cerr << "\n"
<< "Func Start to $" << func->name << " at "
<< &existingBuilder << "\n");
}
ReconstructState getCurrState() {
// We are either in a sequence or not in a sequence. If we are in a sequence
// and have already created the body of the outlined function that will be
// called, then we will skip instructions, otherwise we add the instructions
// to the outlined function. If we are not in a sequence, then the
// instructions are sent to the existing function.
if (seqCounter < sequences.size() &&
instrCounter >= sequences[seqCounter].startIdx &&
instrCounter < sequences[seqCounter].endIdx) {
return getModule()->getFunction(sequences[seqCounter].func)->body
? InSkipSeq
: InSeq;
}
return NotInSeq;
}
void maybeBeginSeq() {
instrCounter++;
auto currState = getCurrState();
if (currState != state) {
switch (currState) {
case NotInSeq:
break;
case InSeq:
transitionToInSeq();
break;
case InSkipSeq:
transitionToInSkipSeq();
break;
}
}
state = currState;
}
void transitionToInSeq() {
Function* outlinedFunc =
getModule()->getFunction(sequences[seqCounter].func);
ASSERT_OK(outlinedBuilder.visitFunctionStart(outlinedFunc));
// Make a call from the existing function to the outlined function. This
// call will replace the instructions moved to the outlined function.
ODBG(std::cerr << "\nadding call " << outlinedFunc->name << " to "
<< &existingBuilder << "\n");
ASSERT_OK(existingBuilder.makeCall(outlinedFunc->name, false));
// If the last instruction of the outlined sequence is unreachable, insert
// an unreachable instruction immediately after the call to the outlined
// function. This maintains the unreachable type in the original scope
// of the outlined sequence.
if (sequences[seqCounter].endsTypeUnreachable) {
ODBG(std::cerr << "\nadding endsUnreachable to " << &existingBuilder
<< "\n");
ASSERT_OK(existingBuilder.makeUnreachable());
}
// Add a local.get instruction for every parameter of the outlined function.
Signature sig = outlinedFunc->getSig();
ODBG(std::cerr << outlinedFunc->name << " takes " << sig.params.size()
<< " parameters\n");
for (Index i = 0; i < sig.params.size(); i++) {
ODBG(std::cerr << "adding local.get $" << i << " to " << &outlinedBuilder
<< "\n");
ASSERT_OK(outlinedBuilder.makeLocalGet(i));
}
}
void transitionToInSkipSeq() {
Function* outlinedFunc =
getModule()->getFunction(sequences[seqCounter].func);
ODBG(std::cerr << "\nstarting to skip instructions "
<< sequences[seqCounter].startIdx << " - "
<< sequences[seqCounter].endIdx - 1 << " to "
<< sequences[seqCounter].func
<< " and adding call() instead\n");
ASSERT_OK(existingBuilder.makeCall(outlinedFunc->name, false));
// If the last instruction of the outlined sequence is unreachable, insert
// an unreachable instruction immediately after the call to the outlined
// function. This maintains the unreachable type in the original scope
// of the outlined sequence.
if (sequences[seqCounter].endsTypeUnreachable) {
ASSERT_OK(existingBuilder.makeUnreachable());
}
}
void maybeEndSeq() {
if (instrCounter + 1 == sequences[seqCounter].endIdx) {
transitionToNotInSeq();
state = NotInSeq;
}
}
void transitionToNotInSeq() {
ODBG(std::cerr << "End of sequence ");
if (state == InSeq) {
ODBG(std::cerr << "to " << &outlinedBuilder);
ASSERT_OK(outlinedBuilder.visitEnd());
}
ODBG(std::cerr << "\n\n");
// Completed a sequence so increase the seqCounter and reset the state.
seqCounter++;
}
#if OUTLINING_DEBUG
void printAddUniqueSymbol(std::string desc) {
std::cerr << desc << std::to_string(instrCounter) << " to "
<< &existingBuilder << "\n";
}
void printVisitExpression(Expression* curr) {
auto* builder = state == InSeq ? &outlinedBuilder
: state == NotInSeq ? &existingBuilder
: nullptr;
auto verb = state == InSkipSeq ? "skipping " : "adding ";
std::cerr << verb << std::to_string(instrCounter) << ": "
<< ShallowExpression{curr} << "(" << curr << ") to " << builder
<< "\n";
}
#endif
};
struct Outlining : public Pass {
void run(Module* module) {
HashStringifyWalker stringify;
// Walk the module and create a "string representation" of the program.
stringify.walkModule(module);
ODBG(printHashString(stringify.hashString, stringify.exprs));
// Collect all of the substrings of the string representation that appear
// more than once in the program.
auto substrings =
StringifyProcessor::repeatSubstrings(stringify.hashString);
// Remove substrings that are substrings of longer repeat substrings.
substrings = StringifyProcessor::dedupe(substrings);
// Remove substrings with overlapping indices.
substrings = StringifyProcessor::filterOverlaps(substrings);
// Remove substrings with branch, return, and try_table instructions until
// an analysis is performed to see if the intended destination of the branch
// is included in the substring to be outlined.
substrings =
StringifyProcessor::filterBranches(substrings, stringify.exprs);
// Remove substrings with local.set instructions until Outlining is extended
// to support arranging for the written values to be returned from the
// outlined function and written back to the original locals.
substrings =
StringifyProcessor::filterLocalSets(substrings, stringify.exprs);
// Remove substrings with local.get instructions until Outlining is extended
// to support passing the local values as additional arguments to the
// outlined function.
substrings =
StringifyProcessor::filterLocalGets(substrings, stringify.exprs);
// Convert substrings to sequences that are more easily outlineable as we
// walk the functions in a module. Sequences contain indices that
// are relative to the enclosing function while substrings have indices
// relative to the entire program.
auto sequences = makeSequences(module, substrings, stringify);
outline(module,
sequences
#if OUTLINING_DEBUG
,
stringify
#endif
);
// Position the outlined functions first in the functions vector to make
// the outlining lit tests far more readable.
moveOutlinedFunctions(module, substrings.size());
// Because we visit control flow in stringified order rather than normal
// postorder, IRBuilder is not able to properly track branches, so it may
// not have finalized blocks with the correct types. ReFinalize now to fix
// any issues.
PassRunner runner(getPassRunner());
runner.add(std::make_unique<ReFinalize>());
runner.run();
}
Name addOutlinedFunction(Module* module,
const SuffixTree::RepeatedSubstring& substring,
const std::vector<Expression*>& exprs) {
auto startIdx = substring.StartIndices[0];
// The outlined functions can be named anything.
Name func = Names::getValidFunctionName(*module, std::string("outline$"));
// Calculate the function signature for the outlined sequence.
StackSignature sig;
for (uint32_t exprIdx = startIdx; exprIdx < startIdx + substring.Length;
exprIdx++) {
sig += StackSignature(exprs[exprIdx]);
}
module->addFunction(Builder::makeFunction(
func, Type(Signature(sig.params, sig.results), NonNullable, Exact), {}));
return func;
}
using Sequences =
std::unordered_map<Name, std::vector<wasm::OutliningSequence>>;
// Converts an array of SuffixTree::RepeatedSubstring to a mapping of original
// functions to repeated sequences they contain. These sequences are ordered
// by start index by construction because the substring's start indices are
// ordered.
Sequences makeSequences(Module* module,
const Substrings& substrings,
const HashStringifyWalker& stringify) {
Sequences seqByFunc;
for (auto& substring : substrings) {
auto func = addOutlinedFunction(module, substring, stringify.exprs);
for (auto seqIdx : substring.StartIndices) {
// seqIdx is relative to the entire program; making the idx of the
// sequence relative to its function is better for outlining because we
// walk functions.
auto [relativeIdx, existingFunc] = stringify.makeRelative(seqIdx);
auto seq = OutliningSequence(
relativeIdx,
relativeIdx + substring.Length,
func,
stringify.exprs[seqIdx + substring.Length - 1]->type ==
Type::unreachable
#if OUTLINING_DEBUG
,
seqIdx
#endif
);
seqByFunc[existingFunc].push_back(seq);
}
}
return seqByFunc;
}
void outline(Module* module,
Sequences seqByFunc
#if OUTLINING_DEBUG
,
const HashStringifyWalker& stringify
#endif
) {
// TODO: Make this a function-parallel sub-pass.
std::vector<Name> keys(seqByFunc.size());
std::transform(seqByFunc.begin(),
seqByFunc.end(),
keys.begin(),
[](auto pair) { return pair.first; });
for (auto func : keys) {
// During function reconstruction, a walker iterates thru each instruction
// of a function, incrementing a counter to find matching sequences. As a
// result, the sequences of a function must be sorted by
// smallest start index, otherwise reconstruction will miss outlining a
// repeat sequence.
std::sort(seqByFunc[func].begin(),
seqByFunc[func].end(),
[](OutliningSequence a, OutliningSequence b) {
return a.startIdx < b.startIdx;
});
ReconstructStringifyWalker reconstruct(module, module->getFunction(func));
reconstruct.sequences = std::move(seqByFunc[func]);
ODBG(printReconstruct(module,
stringify.hashString,
stringify.exprs,
func,
reconstruct.sequences));
reconstruct.doWalkFunction(module->getFunction(func));
}
}
void moveOutlinedFunctions(Module* module, uint32_t outlinedCount) {
// Rearrange outlined functions to the beginning of the functions vector by
// using std::make_move_iterator to avoid making copies. A temp vector is
// created to avoid iterator invalidation.
auto count = module->functions.size();
std::vector<std::unique_ptr<Function>> temp(
std::make_move_iterator(module->functions.end() - outlinedCount),
std::make_move_iterator(module->functions.end()));
module->functions.insert(module->functions.begin(),
std::make_move_iterator(temp.begin()),
std::make_move_iterator(temp.end()));
module->functions.resize(count);
// After the functions vector is directly manipulated, we need to call
// updateFunctionsMap().
module->updateFunctionsMap();
}
#if OUTLINING_DEBUG
void printHashString(const std::vector<uint32_t>& hashString,
const std::vector<Expression*>& exprs) {
std::cerr << "\n\n";
for (Index idx = 0; idx < hashString.size(); idx++) {
Expression* expr = exprs[idx];
if (expr) {
std::cerr << idx << " - " << hashString[idx] << ": "
<< ShallowExpression{expr} << "\n";
} else {
std::cerr << idx << ": unique symbol\n";
}
}
}
void printReconstruct(Module* module,
const std::vector<uint32_t>& hashString,
const std::vector<Expression*>& exprs,
Name existingFunc,
const std::vector<OutliningSequence>& seqs) {
std::cerr << "\n\nReconstructing existing fn: " << existingFunc << "\n";
std::cerr << "moving sequences: "
<< "\n";
for (auto& seq : seqs) {
for (Index idx = seq.programIdx;
idx < seq.programIdx + (seq.endIdx - seq.startIdx);
idx++) {
Expression* expr = exprs[idx];
if (expr == nullptr) {
std::cerr << "unique symbol\n";
} else {
std::cerr << idx << " - " << hashString[idx] << " - " << seq.startIdx
<< " : " << ShallowExpression{expr} << "\n";
}
}
std::cerr << "to outlined function: " << seq.func << "\n";
auto outlinedFunction = module->getFunction(seq.func);
std::cerr << "with signature: " << outlinedFunction->type.toString()
<< "\n";
}
}
#endif
};
Pass* createOutliningPass() { return new Outlining(); }
} // namespace wasm