blob: 994142e482fb9989c6eee9a200d48ff0424cde1a [file] [log] [blame] [edit]
/*
* Copyright 2019 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Asyncify: async/await style code transformation, that allows pausing
// and resuming. This lets a language support synchronous operations in
// an async manner, for example, you can do a blocking wait, and that will
// be turned into code that unwinds the stack at the "blocking" operation,
// then is able to rewind it back up when the actual async operation
// completes, so the code appears to have been running synchronously
// all the while. Use cases for this include coroutines, python generators,
// etc.
//
// The approach here is a third-generation design after Emscripten's original
// Asyncify and then Emterpreter-Async approaches:
//
// * Old Asyncify rewrote control flow in LLVM IR. A problem is that this
// needs to save all SSA registers as part of the local state, which can be
// very costly. A further increase can happen because of phis that are
// added because of control flow transformations. As a result we saw
// pathological cases where the code size increase was unacceptable.
// * Emterpreter-Async avoids any risk of code size increase by running it
// in a little bytecode VM, which also makes pausing/resuming trivial.
// Speed is the main downside here; however, the approach did prove that by
// *selectively* emterpretifying only certain code, many projects can run
// at full speed - often just the "main loop" must be emterpreted, while
// high-speed code that it calls, and in which cannot be an async operation,
// remain at full speed.
//
// New Asyncify's design learns from both of those:
//
// * The code rewrite is done in Binaryen, that is, at the wasm level. At
// this level we will only need to save wasm locals, which is a much smaller
// number than LLVM's SSA registers.
// * We aim for low but non-zero overhead. Low overhead is very important
// for obvious reasons, while Emterpreter-Async proved it is tolerable to
// have *some* overhead, if the transform can be applied selectively.
//
// This new Asyncify transformation implemented here is simpler than old
// Asyncify but has low overhead when properly optimized. Old Asyncify worked at
// the CFG level and added branches there; new Asyncify on the other hand works
// on the structured control flow of wasm and simply "skips over" code when
// rewinding the stack, and jumps out when unwinding. The transformed code looks
// conceptually like this:
// (We have three phases: normal, unwinding, and rewinding)
//
// void foo(int x) {
// // new prelude
// if (rewinding) {
// loadLocals();
// }
// // main body starts here
// if (normal) {
// // some code we must skip while rewinding
// x = x + 1;
// x = x / 2;
// }
// // If rewinding, do this call only if it is the right one.
// if (normal or check_call_index(0)) { // 0 is index of this bar() call
// bar(x);
// if (unwinding) {
// noteUnWound(0);
// saveLocals();
// return;
// }
// }
// if (normal) {
// // more code we must skip while rewinding
// while (x & 7) {
// x = x + 1;
// }
// }
// [..]
//
// The general idea is that while rewinding we just "keep going forward",
// skipping code we should not run. That is, instead of jumping directly
// to the right place, we have ifs that skip along instead. The ifs for
// rewinding and unwinding should be well-predicted, so the overhead should
// not be very high. However, we do need to reach the right location via
// such skipping, which means that in a very large function the rewind
// takes some extra time. On the other hand, though, code that cannot
// unwind or rewind (like that loop near the end) can run at full speed.
// Overall, this should allow good performance with small overhead that is
// mostly noticed at rewind time.
//
// After this pass is run a new i32 global "__asyncify_state" is added, which
// has the following values:
//
// 0: normal execution
// 1: unwinding the stack
// 2: rewinding the stack
//
// There is also "__asyncify_data" which when rewinding and unwinding
// contains a pointer to a data structure with the info needed to rewind
// and unwind:
//
// { // offsets
// i32 - current asyncify stack location // 0
// i32 - asyncify stack end // 4
// }
//
// Or for wasm64:
//
// { // offsets
// i64 - current asyncify stack location // 0
// i64 - asyncify stack end // 8
// }
//
// The asyncify stack is a representation of the call frame, as a list of
// indexes of calls. In the example above, we saw index "0" for calling "bar"
// from "foo". When unwinding, the indexes are added to the stack; when
// rewinding, they are popped off; the current asyncify stack location is
// updated while doing both operations. The asyncify stack is also used to
// save locals. Note that the stack end location is provided, which is for
// error detection.
//
// Note: all pointers are assumed to be 4-byte (wasm32) / 8-byte (wasm64)
// aligned.
//
// When you start an unwinding operation, you must set the initial fields
// of the data structure, that is, set the current stack location to the
// proper place, and the end to the proper end based on how much memory
// you reserved. Note that asyncify will grow the stack up.
//
// The pass will also create five functions that let you control unwinding
// and rewinding:
//
// * asyncify_start_unwind(data : iPTR): call this to start unwinding the
// stack from the current location. "data" must point to a data
// structure as described above (with fields containing valid data).
//
// * asyncify_stop_unwind(): call this to note that unwinding has
// concluded. If no other code will run before you start to rewind,
// this is not strictly necessary, however, if you swap between
// coroutines, or even just want to run some normal code during a
// "sleep", then you must call this at the proper time. Otherwise,
// the code will think it is still unwinding when it should not be,
// which means it will keep unwinding in a meaningless way.
//
// * asyncify_start_rewind(data : iPTR): call this to start rewinding the
// stack back up to the location stored in the provided data. This prepares
// for the rewind; to start it, you must call the first function in the
// call stack to be unwound.
//
// * asyncify_stop_rewind(): call this to note that rewinding has
// concluded, and normal execution can resume.
//
// * asyncify_get_state(): call this to get the current value of the
// internal "__asyncify_state" variable as described above.
// It can be used to distinguish between unwinding/rewinding and normal
// calls, so that you know when to start an asynchronous operation and
// when to propagate results back.
//
// These five functions are exported so that you can call them from the
// outside. If you want to manage things from inside the wasm, then you
// couldn't have called them before they were created by this pass. To work
// around that, you can create imports to asyncify.start_unwind,
// asyncify.stop_unwind, asyncify.start_rewind, asyncify.stop_rewind, and
// asyncify.get_state; if those exist when this pass runs then it will turn
// those into direct calls to the functions that it creates. Note that when
// doing everything in wasm like this, Asyncify must not instrument your
// "runtime" support code, that is, the code that initiates unwinds and rewinds
// and stops them. If it did, the unwind would not stop until you left the wasm
// module entirely, etc. Therefore we do not instrument a function if it has a
// call to the four asyncify_* methods. Note that you may need to disable
// inlining if that would cause code that does need to be instrumented show up
// in that runtime code.
//
// To use this API, call asyncify_start_unwind when you want to. The call
// stack will then be unwound, and so execution will resume in the JS or
// other host environment on the outside that called into wasm. When you
// return there after unwinding, call asyncify_stop_unwind. Then when
// you want to rewind, call asyncify_start_rewind, and then call the same
// initial function you called before, so that rewinding can begin. The
// rewinding will reach the same function from which you started, since
// we are recreating the call stack. At that point you should call
// asyncify_stop_rewind and then execution can resume normally.
//
// Note that the asyncify_* API calls will verify that the data structure
// is valid, checking if the current location is past the end. If so, they
// will throw a wasm unreachable. No check is done during unwinding or
// rewinding, to keep things fast - in principle, from when unwinding begins
// and until it ends there should be no memory accesses aside from the
// asyncify code itself (and likewise when rewinding), so there should be
// no chance of memory corruption causing odd errors. However, the low
// overhead of this approach does cause an error only to be shown when
// unwinding/rewinding finishes, and not at the specific spot where the
// stack end was exceeded.
//
// By default this pass is very careful: it assumes that any import and
// any indirect call may start an unwind/rewind operation. If you know more
// specific information you can inform asyncify about that, which can reduce
// a great deal of overhead, as it can instrument less code. The relevant
// options to wasm-opt etc. are:
//
// --pass-arg=asyncify-imports@module1.base1,module2.base2,module3.base3
//
// Each module.base in that comma-separated list will be considered to
// be an import that can unwind/rewind, and all others are assumed not to
// (aside from the asyncify.* imports, which are always assumed to).
// Each entry can end in a '*' in which case it is matched as a prefix.
//
// The list of imports can be a response file (which is convenient if it
// is long, or you don't want to bother escaping it on the commandline
// etc.), e.g. --pass-arg=asyncify-imports@@file.txt will load the
// contents of file.txt (note the double "@@" - the first is the
// separator for --pass-arg, and the second is the usual convention for
// indicating a response file).
//
// --pass-arg=asyncify-ignore-imports
//
// Ignore all imports (except for asyncify.*), that is, assume none of them
// can start an unwind/rewind. (This is effectively the same as providing
// asyncify-imports with a list of non-existent imports.)
//
// --pass-arg=asyncify-ignore-indirect
//
// Ignore all indirect calls. This implies that you know the call stack
// will never need to be unwound with an indirect call somewhere in it.
// If that is true for your codebase, then this can be extremely useful
// as otherwise it looks like any indirect call can go to a lot of places.
//
// --pass-arg=asyncify-asserts
//
// This enables extra asserts in the output, like checking if we put in
// an unwind/rewind in an invalid place (this can be helpful for manual
// tweaking of the only-list / remove-list, see later).
//
// --pass-arg=asyncify-verbose
//
// Logs out instrumentation decisions to the console. This can help figure
// out why a certain function was instrumented.
//
// --pass-arg=asyncify-memory@memory
// Picks which exported memory of the module to store and load data from
// and to (useful if the module contains multiple memories).
//
// For manual fine-tuning of the list of instrumented functions, there are lists
// that you can set. These must be used carefully, as misuse can break your
// application - for example, if a function is called that should be
// instrumented but isn't because of these options, then bad things can happen.
// Note that even if you test this locally then users running your code in
// production may reach other code paths. Use these carefully!
//
// Each of the lists can be used with a response file (@filename, which is then
// loaded from the file). You can also use '*' wildcard matching in the lists.
//
// --pass-arg=asyncify-removelist@name1,name2,name3
//
// If the "remove-list" is provided, then the functions in it will be
// *removed* from the list of instrumented functions. That is, they won't
// be instrumented even if it looks like they need to be. This can be
// useful if you know things the safe whole-program analysis doesn't, e.g.
// that certain code paths will not be taken, where certain indirect calls
// will go, etc.
//
// --pass-arg=asyncify-addlist@name1,name2,name3
//
// If the "add-list" is provided, then the functions in the list will be
// *added* to the list of instrumented functions, that is, they will be
// instrumented even if otherwise we think they don't need to be. As by
// default everything will be instrumented in the safest way possible,
// this is only useful if you use ignore-indirect and use this to fix up
// some indirect calls that *do* need to be instrumented, or if you will
// do some later transform of the code that adds more call paths, etc.
//
// --pass-arg=asyncify-propagate-addlist
//
// The default behaviour of the addlist does not propagate instrumentation
// status. If this option is set then functions which call a function in
// the addlist will also be instrumented, and those that call them and so
// on.
//
// --pass-arg=asyncify-onlylist@name1,name2,name3
//
// If the "only-list" is provided, then *only* the functions in the list
// will be instrumented, and nothing else.
//
// Note that there are two types of instrumentation that happen for each
// function: if foo() can be part of a pause/resume operation, then we need to
// instrument code inside it to support pausing and resuming, but also, we need
// callers of the function to instrument calls to it. Normally this is already
// taken care of as the callers need to be instrumented as well anyhow. However,
// the ignore-indirect option makes things more complicated, since we can't tell
// where all the calls to foo() are - all we see are indirect calls that do not
// refer to foo() by name. To make it possible for you to use the add-list or
// only-list with ignore-indirect, those lists affect *both* kinds of
// instrumentation. That is, if parent() calls foo() indirectly, and you add
// parent() to the add-list, then the indirect calls in parent() will be
// instrumented to support pausing/resuming, even if ignore-indirect is set.
// Typically you don't need to think about this, and just add all the functions
// that can be on the stack while pausing - what this means is that when you do
// so, indirect calls will just work. (The cost is that an instrumented function
// will check for pausing at all indirect calls, which may be unnecessary in
// some cases; but this is an instrumented function anyhow, and indirect calls
// are slower anyhow, so this simple model seems good enough - a more complex
// model where you can specify "instrument, but not indirect calls from me"
// would likely have little benefit.)
//
// TODO When wasm has GC, extending the live ranges of locals can keep things
// alive unnecessarily. We may want to set locals to null at the end
// of their original range.
//
#include "asmjs/shared-constants.h"
#include "cfg/liveness-traversal.h"
#include "ir/effects.h"
#include "ir/find_all.h"
#include "ir/linear-execution.h"
#include "ir/literal-utils.h"
#include "ir/memory-utils.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/utils.h"
#include "pass.h"
#include "passes/pass-utils.h"
#include "support/file.h"
#include "support/string.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
namespace {
static const Name ASYNCIFY_STATE = "__asyncify_state";
static const Name ASYNCIFY_GET_STATE = "asyncify_get_state";
static const Name ASYNCIFY_DATA = "__asyncify_data";
static const Name ASYNCIFY_START_UNWIND = "asyncify_start_unwind";
static const Name ASYNCIFY_STOP_UNWIND = "asyncify_stop_unwind";
static const Name ASYNCIFY_START_REWIND = "asyncify_start_rewind";
static const Name ASYNCIFY_STOP_REWIND = "asyncify_stop_rewind";
static const Name ASYNCIFY_UNWIND = "__asyncify_unwind";
static const Name ASYNCIFY = "asyncify";
static const Name START_UNWIND = "start_unwind";
static const Name STOP_UNWIND = "stop_unwind";
static const Name START_REWIND = "start_rewind";
static const Name STOP_REWIND = "stop_rewind";
static const Name ASYNCIFY_GET_CALL_INDEX = "__asyncify_get_call_index";
static const Name ASYNCIFY_CHECK_CALL_INDEX = "__asyncify_check_call_index";
// TODO: having just normal/unwind_or_rewind would decrease code
// size, but make debugging harder
enum class State { Normal = 0, Unwinding = 1, Rewinding = 2 };
enum class DataOffset { BStackPos = 0, BStackEnd = 4, BStackEnd64 = 8 };
const auto STACK_ALIGN = 4;
// A helper class for managing fake global names. Creates the globals and
// provides mappings for using them.
// Fake globals are used to stash and then use return values from calls. We need
// to store them somewhere that is valid Binaryen IR, but also will be ignored
// by the Asyncify instrumentation, so we don't want to use a local. What we do
// is replace the local used to receive a call's result with a fake global.set
// to stash it, then do a fake global.get to receive it afterwards. (We do it in
// two steps so that if we are async, we only do the first and not the second,
// i.e., we don't store to the target local if not running normally).
class FakeGlobalHelper {
Module& module;
public:
FakeGlobalHelper(Module& module) : module(module) {
Builder builder(module);
std::string prefix = "asyncify_fake_call_global_";
for (auto type : collectTypes()) {
auto global = prefix + Type(type).toString();
map[type] = global;
rev[global] = type;
module.addGlobal(builder.makeGlobal(
global, type, LiteralUtils::makeZero(type, module), Builder::Mutable));
}
}
~FakeGlobalHelper() {
for (auto& pair : map) {
auto name = pair.second;
module.removeGlobal(name);
}
}
Name getName(Type type) { return map.at(type); }
Type getTypeOrNone(Name name) {
auto iter = rev.find(name);
if (iter != rev.end()) {
return iter->second;
}
return Type::none;
}
private:
std::unordered_map<Type, Name> map;
std::unordered_map<Name, Type> rev;
// Collect the types returned from all calls for which call support globals
// may need to be generated.
using Types = std::unordered_set<Type>;
Types collectTypes() {
ModuleUtils::ParallelFunctionAnalysis<Types> analysis(
module, [&](Function* func, Types& types) {
if (!func->body) {
return;
}
struct TypeCollector : PostWalker<TypeCollector> {
Types& types;
TypeCollector(Types& types) : types(types) {}
void visitCall(Call* curr) {
if (curr->type.isConcrete()) {
types.insert(curr->type);
}
}
void visitCallIndirect(CallIndirect* curr) {
if (curr->type.isConcrete()) {
types.insert(curr->type);
}
}
};
TypeCollector(types).walk(func->body);
});
Types types;
for (auto& pair : analysis.map) {
Types& functionTypes = pair.second;
for (auto t : functionTypes) {
types.insert(t);
}
}
return types;
}
};
class PatternMatcher {
public:
std::string designation;
std::set<Name> names;
std::set<std::string> patterns;
std::set<std::string> patternsMatched;
std::map<std::string, std::string> unescaped;
PatternMatcher(std::string designation,
Module& module,
const String::Split& list)
: designation(designation) {
// The lists contain human-readable strings. Turn them into the
// internal escaped names for later comparisons
for (auto& name : list) {
auto escaped = WasmBinaryReader::escape(name);
unescaped[escaped.toString()] = name;
if (name.find('*') != std::string::npos) {
patterns.insert(escaped.toString());
} else {
auto* func = module.getFunctionOrNull(escaped);
if (!func) {
std::cerr << "warning: Asyncify " << designation
<< "list contained a non-existing function name: " << name
<< " (" << escaped << ")\n";
} else if (func->imported()) {
Fatal() << "Asyncify " << designation
<< "list contained an imported function name (use the import "
"list for imports): "
<< name << '\n';
}
names.insert(escaped.str);
}
}
}
bool match(Name funcName) {
if (names.count(funcName) > 0) {
return true;
} else {
for (auto& pattern : patterns) {
if (String::wildcardMatch(pattern, funcName.toString())) {
patternsMatched.insert(pattern);
return true;
}
}
}
return false;
}
void checkPatternsMatches() {
for (auto& pattern : patterns) {
if (patternsMatched.count(pattern) == 0) {
std::cerr << "warning: Asyncify " << designation
<< "list contained a non-matching pattern: "
<< unescaped[pattern] << " (" << pattern << ")\n";
}
}
}
};
// Analyze the entire module to see which calls may change the state, that
// is, start an unwind or rewind), either in itself or in something called
// by it.
// Handles global module management, needed from the various parts of this
// transformation.
class ModuleAnalyzer {
Module& module;
bool canIndirectChangeState;
struct Info
: public ModuleUtils::CallGraphPropertyAnalysis<Info>::FunctionInfo {
// The function name.
Name name;
// If this function can start an unwind/rewind. We only set this in cases
// where we need to know that fact and also that we need to instrument code
// to handle it (as a result, we do not set it for the bottommost runtime,
// which needs no instrumentation).
bool canChangeState = false;
// If this function is part of the runtime that receives an unwinding
// and starts a rewinding. If so, we do not instrument it, see above.
// This is only relevant when handling things entirely inside wasm,
// as opposed to imports.
bool isBottomMostRuntime = false;
// If this function is part of the runtime that starts an unwinding
// and stops a rewinding. If so, we do not instrument it, see above.
// The difference between the top-most and bottom-most runtime is that
// the top-most part is still marked as changing the state; so things
// that call it are instrumented. This is not done for the bottom.
bool isTopMostRuntime = false;
bool inRemoveList = false;
bool addedFromList = false;
};
using Map = std::map<Function*, Info>;
Map map;
public:
ModuleAnalyzer(Module& module,
std::function<bool(Name, Name)> canImportChangeState,
bool canIndirectChangeState,
const String::Split& removeListInput,
const String::Split& addListInput,
bool propagateAddList,
const String::Split& onlyListInput,
bool verbose)
: module(module), canIndirectChangeState(canIndirectChangeState),
fakeGlobals(module), verbose(verbose) {
PatternMatcher removeList("remove", module, removeListInput);
PatternMatcher addList("add", module, addListInput);
PatternMatcher onlyList("only", module, onlyListInput);
// Rename the asyncify imports so their internal name matches the
// convention. This makes replacing them with the implementations
// later easier.
std::map<Name, Name> renamings;
for (auto& func : module.functions) {
if (func->module == ASYNCIFY) {
if (func->base == START_UNWIND) {
renamings[func->name] = ASYNCIFY_START_UNWIND;
} else if (func->base == STOP_UNWIND) {
renamings[func->name] = ASYNCIFY_STOP_UNWIND;
} else if (func->base == START_REWIND) {
renamings[func->name] = ASYNCIFY_START_REWIND;
} else if (func->base == STOP_REWIND) {
renamings[func->name] = ASYNCIFY_STOP_REWIND;
} else {
Fatal() << "call to unidenfied asyncify import: " << func->base;
}
}
}
ModuleUtils::renameFunctions(module, renamings);
// Scan to see which functions can directly change the state.
// Also handle the asyncify imports, removing them (as we will implement
// them later), and replace calls to them with calls to the later proper
// name.
ModuleUtils::CallGraphPropertyAnalysis<Info> scanner(
module, [&](Function* func, Info& info) {
info.name = func->name;
if (func->imported()) {
// The relevant asyncify imports can definitely change the state.
if (func->module == ASYNCIFY &&
(func->base == START_UNWIND || func->base == STOP_REWIND)) {
info.canChangeState = true;
} else {
info.canChangeState =
canImportChangeState(func->module, func->base);
if (verbose && info.canChangeState) {
std::cout << "[asyncify] " << func->name
<< " is an import that can change the state\n";
}
}
return;
}
struct Walker : PostWalker<Walker> {
Info& info;
Module& module;
bool canIndirectChangeState;
Walker(Info& info, Module& module, bool canIndirectChangeState)
: info(info), module(module),
canIndirectChangeState(canIndirectChangeState) {}
void visitCall(Call* curr) {
if (curr->isReturn) {
Fatal() << "tail calls not yet supported in asyncify";
}
auto* target = module.getFunction(curr->target);
if (target->imported() && target->module == ASYNCIFY) {
// Redirect the imports to the functions we'll add later.
if (target->base == START_UNWIND) {
info.canChangeState = true;
info.isTopMostRuntime = true;
} else if (target->base == STOP_UNWIND) {
info.isBottomMostRuntime = true;
} else if (target->base == START_REWIND) {
info.isBottomMostRuntime = true;
} else if (target->base == STOP_REWIND) {
info.canChangeState = true;
info.isTopMostRuntime = true;
} else {
WASM_UNREACHABLE("call to unidenfied asyncify import");
}
}
}
void visitCallIndirect(CallIndirect* curr) {
if (curr->isReturn) {
Fatal() << "tail calls not yet supported in asyncify";
}
if (canIndirectChangeState) {
info.canChangeState = true;
}
// TODO optimize the other case, at least by type
}
};
Walker walker(info, module, canIndirectChangeState);
walker.walk(func->body);
if (info.isBottomMostRuntime) {
info.canChangeState = false;
// TODO: issue warnings on suspicious things, like a function in
// the bottom-most runtime also doing top-most runtime stuff
// like starting and unwinding.
}
if (verbose && info.canChangeState) {
std::cout << "[asyncify] " << func->name
<< " can change the state due to initial scan\n";
}
});
// Functions in the remove-list are assumed to not change the state.
for (auto& [func, info] : scanner.map) {
if (removeList.match(func->name)) {
info.inRemoveList = true;
if (verbose && info.canChangeState) {
std::cout << "[asyncify] " << func->name
<< " is in the remove-list, ignore\n";
}
info.canChangeState = false;
}
}
// Remove the asyncify imports, if any, and any calls to them.
std::vector<Name> funcsToDelete;
for (auto& [func, info] : scanner.map) {
auto& callsTo = info.callsTo;
if (func->imported() && func->module == ASYNCIFY) {
funcsToDelete.push_back(func->name);
}
std::vector<Function*> callersToDelete;
for (auto* target : callsTo) {
if (target->imported() && target->module == ASYNCIFY) {
callersToDelete.push_back(target);
}
}
for (auto* target : callersToDelete) {
callsTo.erase(target);
}
}
for (auto name : funcsToDelete) {
module.removeFunction(name);
}
auto handleAddList = [&](ModuleAnalyzer::Map& map) {
if (!addListInput.empty()) {
for (auto& func : module.functions) {
if (addList.match(func->name) && removeList.match(func->name)) {
Fatal() << func->name
<< " is found in the add-list and in the remove-list";
}
if (!func->imported() && addList.match(func->name)) {
auto& info = map[func.get()];
if (verbose && !info.canChangeState) {
std::cout << "[asyncify] " << func->name
<< " is in the add-list, add\n";
}
info.canChangeState = true;
info.addedFromList = true;
}
}
}
};
// When propagateAddList is enabled, we should check a add-list before
// scannerpropagateBack so that callers of functions in add-list should also
// be instrumented.
if (propagateAddList) {
handleAddList(scanner.map);
}
// The order of propagation in |propagateBack| is non-deterministic, so sort
// the loggings we intend to do.
std::vector<std::string> loggings;
scanner.propagateBack([](const Info& info) { return info.canChangeState; },
[](const Info& info) {
return !info.isBottomMostRuntime &&
!info.inRemoveList;
},
[](Info& info) { info.canChangeState = true; },
[&](const Info& info, Function* reason) {
if (verbose) {
std::stringstream str;
str << "[asyncify] " << info.name
<< " can change the state due to "
<< reason->name << "\n";
loggings.push_back(str.str());
}
},
scanner.IgnoreNonDirectCalls);
if (!loggings.empty()) {
std::sort(loggings.begin(), loggings.end());
for (auto& logging : loggings) {
std::cout << logging;
}
}
map.swap(scanner.map);
if (!onlyListInput.empty()) {
// Only the functions in the only-list can change the state.
for (auto& func : module.functions) {
if (!func->imported()) {
auto& info = map[func.get()];
bool matched = onlyList.match(func->name);
info.canChangeState = matched;
if (matched) {
info.addedFromList = true;
}
if (verbose) {
std::cout << "[asyncify] " << func->name
<< "'s state is set based on the only-list to " << matched
<< '\n';
}
}
}
}
// When propagateAddList is disabled, which is default behavior,
// functions in add-list are just prepended to instrumented functions.
if (!propagateAddList) {
handleAddList(map);
}
removeList.checkPatternsMatches();
addList.checkPatternsMatches();
onlyList.checkPatternsMatches();
}
bool needsInstrumentation(Function* func) {
auto& info = map[func];
return info.canChangeState && !info.isTopMostRuntime;
}
bool canChangeState(Expression* curr, Function* func) {
// Look inside to see if we call any of the things we know can change the
// state.
// TODO: caching, this is O(N^2)
struct Walker : PostWalker<Walker> {
void visitCall(Call* curr) {
// We only implement these at the very end, but we know that they
// definitely change the state.
if (curr->target == ASYNCIFY_START_UNWIND ||
curr->target == ASYNCIFY_STOP_REWIND ||
curr->target == ASYNCIFY_GET_CALL_INDEX ||
curr->target == ASYNCIFY_CHECK_CALL_INDEX) {
canChangeState = true;
return;
}
if (curr->target == ASYNCIFY_STOP_UNWIND ||
curr->target == ASYNCIFY_START_REWIND) {
isBottomMostRuntime = true;
return;
}
// The target may not exist if it is one of our temporary intrinsics.
auto* target = module->getFunctionOrNull(curr->target);
if (target && (*map)[target].canChangeState) {
canChangeState = true;
}
}
void visitCallIndirect(CallIndirect* curr) { hasIndirectCall = true; }
Module* module;
ModuleAnalyzer* analyzer;
Map* map;
bool hasIndirectCall = false;
bool canChangeState = false;
bool isBottomMostRuntime = false;
};
Walker walker;
walker.module = &module;
walker.analyzer = this;
walker.map = &map;
walker.walk(curr);
// An indirect call is normally ignored if we are ignoring indirect calls.
// However, see the docs at the top: if the function we are inside was
// specifically added by the user (in the only-list or the add-list) then we
// instrument indirect calls from it (this allows specifically allowing some
// indirect calls but not others).
if (walker.hasIndirectCall &&
(canIndirectChangeState || map[func].addedFromList)) {
walker.canChangeState = true;
}
// The bottom-most runtime can never change the state.
return walker.canChangeState && !walker.isBottomMostRuntime;
}
FakeGlobalHelper fakeGlobals;
bool verbose;
};
// Checks if something performs a call: either a direct or indirect call,
// and perhaps it is dropped or assigned to a local. This captures all the
// cases of a call in flat IR.
static bool doesCall(Expression* curr) {
if (auto* set = curr->dynCast<LocalSet>()) {
curr = set->value;
} else if (auto* drop = curr->dynCast<Drop>()) {
curr = drop->value;
}
return curr->is<Call>() || curr->is<CallIndirect>();
}
class AsyncifyBuilder : public Builder {
public:
Module& wasm;
Type pointerType;
Name asyncifyMemory;
AsyncifyBuilder(Module& wasm, Type pointerType, Name asyncifyMemory)
: Builder(wasm), wasm(wasm), pointerType(pointerType),
asyncifyMemory(asyncifyMemory) {}
Expression* makeGetStackPos() {
return makeLoad(pointerType.getByteSize(),
false,
int(DataOffset::BStackPos),
pointerType.getByteSize(),
makeGlobalGet(ASYNCIFY_DATA, pointerType),
pointerType,
asyncifyMemory);
}
Expression* makeIncStackPos(int32_t by) {
if (by == 0) {
return makeNop();
}
auto literal = Literal::makeFromInt64(by, pointerType);
return makeStore(pointerType.getByteSize(),
int(DataOffset::BStackPos),
pointerType.getByteSize(),
makeGlobalGet(ASYNCIFY_DATA, pointerType),
makeBinary(Abstract::getBinary(pointerType, Abstract::Add),
makeGetStackPos(),
makeConst(literal)),
pointerType,
asyncifyMemory);
}
Expression* makeStateCheck(State value) {
return makeBinary(EqInt32,
makeGlobalGet(ASYNCIFY_STATE, Type::i32),
makeConst(Literal(int32_t(value))));
}
};
// Instrument control flow, around calls and adding skips for rewinding.
struct AsyncifyFlow : public Pass {
bool isFunctionParallel() override { return true; }
ModuleAnalyzer* analyzer;
Type pointerType;
Name asyncifyMemory;
std::unique_ptr<Pass> create() override {
return std::make_unique<AsyncifyFlow>(
analyzer, pointerType, asyncifyMemory);
}
AsyncifyFlow(ModuleAnalyzer* analyzer, Type pointerType, Name asyncifyMemory)
: analyzer(analyzer), pointerType(pointerType),
asyncifyMemory(asyncifyMemory) {}
void runOnFunction(Module* module_, Function* func_) override {
module = module_;
func = func_;
builder =
std::make_unique<AsyncifyBuilder>(*module, pointerType, asyncifyMemory);
// If the function cannot change our state, we have nothing to do -
// we will never unwind or rewind the stack here.
if (!analyzer->needsInstrumentation(func)) {
return;
}
// Rewrite the function body.
// Each function we enter will pop one from the stack, which is the index
// of the next call to make.
auto* block = builder->makeBlock(
{builder->makeIf(builder->makeStateCheck(
State::Rewinding), // TODO: such checks can be !normal
makeCallIndexPop()),
process(func->body)});
if (func->getResults() != Type::none) {
// Rewriting control flow may alter things; make sure the function ends in
// something valid (which the optimizer can remove later).
block->list.push_back(builder->makeUnreachable());
}
block->finalize();
func->body = block;
// Making things like returns conditional may alter types.
ReFinalize().walkFunctionInModule(func, module);
}
private:
std::unique_ptr<AsyncifyBuilder> builder;
Module* module;
Function* func;
// Each call in the function has an index, noted during unwind and checked
// during rewind.
Index callIndex = 0;
Expression* process(Expression* curr) {
// The IR is in flat form, which makes this much simpler: there are no
// unnecessarily nested side effects or control flow, so we can add
// skips for rewinding in an easy manner, putting a single if around
// whole chunks of code. Also calls are separated out so that it is easy
// to add the necessary checks for them for unwinding/rewinding support.
//
// Aside from that, we must "linearize" all control flow so that we can
// reach the right part when rewinding, which is done by always skipping
// forward. For example, for an if we do this:
//
// if (cond) { // cond is either a const or a local.get in a flat IR
// side1();
// } else {
// side2();
// }
// =>
// if (rewinding || cond) {
// new_side1();
// }
// if (rewinding || !cond) {
// new_side2();
// }
// where new_sideN is
// if (normal || check_call_index(callIndex)) {
// sideN();
// }
// when sideN can change the state, and
// if (normal) {
// sideN();
// }
// when it does not. (See makeCallSupport() for details.)
//
// This way we will linearly get through all the code in the function,
// if we are rewinding. In a similar way we skip over breaks, etc.; just
// "keep on truckin'".
//
// Note that temp locals added in this way are just normal locals, and in
// particular they are saved and loaded. That way if we resume from the
// first if arm we will avoid the second.
// To avoid recursion, we use stacks here. We process the work for each
// node in two phases as follows:
//
// 1. The "Scan" phase finds children we need to process (ones that may
// change the state), adds Scan tasks for them to the work stack, and
// process call instructions.
// 2. The "Finish" phase runs after all children have been Scanned and
// Finished. It pops the children's results from the results stack (if
// there were relevant children), and then it pushes its own result.
//
struct Work {
Expression* curr;
enum { Scan, Finish } phase;
};
std::vector<Work> work;
std::vector<Expression*> results;
std::unordered_set<Expression*> processed;
work.push_back(Work{curr, Work::Scan});
while (!work.empty()) {
auto item = work.back();
work.pop_back();
processed.insert(item.curr);
auto* curr = item.curr;
auto phase = item.phase;
if (phase == Work::Scan && !analyzer->canChangeState(curr, func)) {
results.push_back(makeMaybeSkip(curr));
continue;
}
if (auto* block = curr->dynCast<Block>()) {
auto& list = block->list;
// Find the children we need to process. They will be Scanned and
// Finished before we reach our own Finish phase.
if (phase == Work::Scan) {
work.push_back(Work{curr, Work::Finish});
// Add Scan tasks in reverse order, so that we process them in
// execution order.
for (size_t i = list.size(); i > 0; i--) {
auto* child = list[i - 1];
if (analyzer->canChangeState(child, func)) {
work.push_back(Work{child, Work::Scan});
}
}
continue;
}
Index i = list.size() - 1;
// At least one of our children may change the state. Clump them as
// necessary.
while (1) {
if (processed.count(list[i])) {
list[i] = results.back();
results.pop_back();
} else {
Index begin = i;
while (begin > 0 && !processed.count(list[begin - 1])) {
begin--;
}
// We have a range of [begin, i] in which the state cannot change,
// so all we need to do is skip it if rewinding.
if (begin == i) {
list[i] = makeMaybeSkip(list[i]);
} else {
auto* block = builder->makeBlock();
for (auto j = begin; j <= i; j++) {
block->list.push_back(list[j]);
}
block->finalize();
list[begin] = makeMaybeSkip(block);
for (auto j = begin + 1; j <= i; j++) {
list[j] = builder->makeNop();
}
}
i = begin;
}
if (i == 0) {
break;
} else {
i--;
}
}
results.push_back(block);
continue;
} else if (auto* iff = curr->dynCast<If>()) {
// The state change cannot be in the condition due to flat form, so it
// must be in one of the children.
assert(!analyzer->canChangeState(iff->condition, func));
if (item.phase == Work::Scan) {
work.push_back(Work{curr, Work::Finish});
// Add ifTrue later so that we process it first.
if (iff->ifFalse) {
work.push_back(Work{iff->ifFalse, Work::Scan});
}
work.push_back(Work{iff->ifTrue, Work::Scan});
continue;
}
// We must linearize this, which means we pass through both arms if we
// are rewinding.
if (!iff->ifFalse) {
iff->condition = builder->makeBinary(
OrInt32, iff->condition, builder->makeStateCheck(State::Rewinding));
iff->ifTrue = results.back();
results.pop_back();
iff->finalize();
results.push_back(iff);
continue;
}
auto* newIfFalse = results.back();
results.pop_back();
auto* newIfTrue = results.back();
results.pop_back();
auto conditionTemp = builder->addVar(func, Type::i32);
// TODO: can avoid pre if the condition is a get or a const
auto* pre =
makeMaybeSkip(builder->makeLocalSet(conditionTemp, iff->condition));
iff->condition = builder->makeLocalGet(conditionTemp, Type::i32);
iff->condition = builder->makeBinary(
OrInt32, iff->condition, builder->makeStateCheck(State::Rewinding));
iff->ifTrue = newIfTrue;
iff->ifFalse = nullptr;
iff->finalize();
// Add support for the second arm as well.
auto* otherIf = builder->makeIf(
builder->makeBinary(
OrInt32,
builder->makeUnary(EqZInt32,
builder->makeLocalGet(conditionTemp, Type::i32)),
builder->makeStateCheck(State::Rewinding)),
newIfFalse);
otherIf->finalize();
results.push_back(builder->makeBlock({pre, iff, otherIf}));
continue;
} else if (auto* loop = curr->dynCast<Loop>()) {
if (item.phase == Work::Scan) {
work.push_back(Work{curr, Work::Finish});
work.push_back(Work{loop->body, Work::Scan});
continue;
}
loop->body = results.back();
results.pop_back();
results.push_back(loop);
continue;
} else if (doesCall(curr)) {
// We reach here only in Scan phase, but we in effect "Finish" calls
// here as well.
results.push_back(makeCallSupport(curr));
continue;
}
// We must handle all control flow above, and all things that can change
// the state, so there should be nothing that can reach here - add it
// earlier as necessary.
// std::cout << *curr << '\n';
WASM_UNREACHABLE("unexpected expression type");
}
assert(results.size() == 1);
return results.back();
}
// Possibly skip some code, if rewinding.
Expression* makeMaybeSkip(Expression* curr) {
return builder->makeIf(builder->makeStateCheck(State::Normal), curr);
}
Expression* makeCallSupport(Expression* curr) {
// TODO: stop doing this after code can no longer reach a call that may
// change the state
assert(doesCall(curr));
assert(curr->type == Type::none);
// The case of a set is tricky: we must *not* execute the set when
// unwinding, since at that point we have a fake value for the return,
// and if we applied it to the local, it would end up saved and then
// potentially used later - and with coalescing, that may interfere
// with other things. Note also that at this point in the process we
// have not yet written the load saving/loading code, so the optimizer
// doesn't see that locals will have another use at the beginning and
// end of the function. We don't do that yet because we don't want it
// to force the final number of locals to be too high, but we also
// must be careful to never touch a local unnecessarily to avoid bugs.
// To implement this, write to a fake global; we'll fix it up after
// AsyncifyLocals locals adds local saving/restoring.
auto* set = curr->dynCast<LocalSet>();
if (set) {
auto name = analyzer->fakeGlobals.getName(set->value->type);
curr = builder->makeGlobalSet(name, set->value);
set->value = builder->makeGlobalGet(name, set->value->type);
}
// Instrument the call itself (or, if a drop, the drop+call).
auto index = callIndex++;
// Execute the call, if either normal execution, or if rewinding and this
// is the right call to go into.
// TODO: we can read the next call index once in each function (but should
// avoid saving/restoring that local later)
curr = builder->makeIf(
builder->makeBinary(OrInt32,
builder->makeStateCheck(State::Normal),
makeCallIndexPeek(index)),
builder->makeSequence(curr, makePossibleUnwind(index, set)));
return curr;
}
Expression* makePossibleUnwind(Index index, Expression* ifNotUnwinding) {
// In this pass we emit an "unwind" as a call to a fake intrinsic. We
// will implement it in the later pass. (We can't create the unwind block
// target here, as the optimizer would remove it later; we can only add
// it when we add its contents, later.)
return builder->makeIf(
builder->makeStateCheck(State::Unwinding),
builder->makeCall(
ASYNCIFY_UNWIND, {builder->makeConst(int32_t(index))}, Type::none),
ifNotUnwinding);
}
Expression* makeCallIndexPeek(Index index) {
// Emit an intrinsic for this, as we store the index into a local, and
// don't want it to be seen by asyncify itself.
return builder->makeCall(ASYNCIFY_CHECK_CALL_INDEX,
{builder->makeConst(int32_t(index))},
Type::i32);
}
Expression* makeCallIndexPop() {
// Emit an intrinsic for this, as we store the index into a local, and
// don't want it to be seen by asyncify itself.
return builder->makeCall(ASYNCIFY_GET_CALL_INDEX, {}, Type::none);
}
};
// Add asserts in non-instrumented code.
struct AsyncifyAssertInNonInstrumented : public Pass {
bool isFunctionParallel() override { return true; }
ModuleAnalyzer* analyzer;
Type pointerType;
Name asyncifyMemory;
std::unique_ptr<Pass> create() override {
return std::make_unique<AsyncifyAssertInNonInstrumented>(
analyzer, pointerType, asyncifyMemory);
}
AsyncifyAssertInNonInstrumented(ModuleAnalyzer* analyzer,
Type pointerType,
Name asyncifyMemory)
: analyzer(analyzer), pointerType(pointerType),
asyncifyMemory(asyncifyMemory) {}
void runOnFunction(Module* module_, Function* func) override {
// FIXME: This looks like it was never right, as it should ignore the top-
// most runtime, but it will actually instrument it (as it needs no
// instrumentation, like random code - but the top-most runtime is
// actually a place that needs neither instrumentation *nor*
// assertions, as the assertions will error when it changes the
// state).
if (!analyzer->needsInstrumentation(func)) {
module = module_;
builder =
std::make_unique<AsyncifyBuilder>(*module, pointerType, asyncifyMemory);
addAssertsInNonInstrumented(func);
}
}
// Given a function that is not instrumented - because we proved it doesn't
// need it, or depending on the only-list / remove-list - add assertions that
// verify that property at runtime.
// Note that it is ok to run code while sleeping (if you are careful not
// to break assumptions in the program!) - so what is actually
// checked here is if the state *changes* in an uninstrumented function.
// That is, if in an uninstrumented function, a sleep should not begin
// from any call.
void addAssertsInNonInstrumented(Function* func) {
auto oldState = builder->addVar(func, Type::i32);
// Add a check at the function entry.
func->body = builder->makeSequence(
builder->makeLocalSet(oldState,
builder->makeGlobalGet(ASYNCIFY_STATE, Type::i32)),
func->body);
// Add a check around every call.
struct Walker : PostWalker<Walker> {
void visitCall(Call* curr) {
// Tail calls will need another type of check, as they wouldn't reach
// this assertion.
assert(!curr->isReturn);
handleCall(curr);
}
void visitCallIndirect(CallIndirect* curr) {
// Tail calls will need another type of check, as they wouldn't reach
// this assertion.
assert(!curr->isReturn);
handleCall(curr);
}
void handleCall(Expression* call) {
auto* check = builder->makeIf(
builder->makeBinary(NeInt32,
builder->makeGlobalGet(ASYNCIFY_STATE, Type::i32),
builder->makeLocalGet(oldState, Type::i32)),
builder->makeUnreachable());
Expression* rep;
if (call->type.isConcrete()) {
auto temp = builder->addVar(func, call->type);
rep = builder->makeBlock({
builder->makeLocalSet(temp, call),
check,
builder->makeLocalGet(temp, call->type),
});
} else {
rep = builder->makeSequence(call, check);
}
replaceCurrent(rep);
}
Function* func;
AsyncifyBuilder* builder;
Index oldState;
};
Walker walker;
walker.func = func;
walker.builder = builder.get();
walker.oldState = oldState;
walker.walk(func->body);
}
private:
std::unique_ptr<AsyncifyBuilder> builder;
Module* module;
};
// Instrument local saving/restoring.
struct AsyncifyLocals : public WalkerPass<PostWalker<AsyncifyLocals>> {
bool isFunctionParallel() override { return true; }
ModuleAnalyzer* analyzer;
Type pointerType;
Name asyncifyMemory;
std::unique_ptr<Pass> create() override {
return std::make_unique<AsyncifyLocals>(
analyzer, pointerType, asyncifyMemory);
}
AsyncifyLocals(ModuleAnalyzer* analyzer,
Type pointerType,
Name asyncifyMemory)
: analyzer(analyzer), pointerType(pointerType),
asyncifyMemory(asyncifyMemory) {}
void visitCall(Call* curr) {
// Replace calls to the fake intrinsics.
if (curr->target == ASYNCIFY_UNWIND) {
replaceCurrent(builder->makeBreak(ASYNCIFY_UNWIND, curr->operands[0]));
} else if (curr->target == ASYNCIFY_GET_CALL_INDEX) {
replaceCurrent(builder->makeSequence(
builder->makeIncStackPos(-4),
builder->makeLocalSet(rewindIndex,
builder->makeLoad(4,
false,
0,
4,
builder->makeGetStackPos(),
Type::i32,
asyncifyMemory))));
} else if (curr->target == ASYNCIFY_CHECK_CALL_INDEX) {
replaceCurrent(
builder->makeBinary(EqInt32,
builder->makeLocalGet(rewindIndex, Type::i32),
curr->operands[0]));
}
}
void visitGlobalSet(GlobalSet* curr) {
auto type = analyzer->fakeGlobals.getTypeOrNone(curr->name);
if (type != Type::none) {
replaceCurrent(
builder->makeLocalSet(getFakeCallLocal(type), curr->value));
}
}
void visitGlobalGet(GlobalGet* curr) {
auto type = analyzer->fakeGlobals.getTypeOrNone(curr->name);
if (type != Type::none) {
replaceCurrent(builder->makeLocalGet(getFakeCallLocal(type), type));
}
}
Index getFakeCallLocal(Type type) {
auto iter = fakeCallLocals.find(type);
if (iter != fakeCallLocals.end()) {
return iter->second;
}
return fakeCallLocals[type] = builder->addVar(getFunction(), type);
}
void doWalkFunction(Function* func) {
// If the function cannot change our state, we have nothing to do -
// we will never unwind or rewind the stack here.
if (!analyzer->needsInstrumentation(func)) {
return;
}
// Find the locals that we actually need to load and save: any local that is
// alive at a relevant call site must be handled, but others can be ignored.
findRelevantLiveLocals(func);
// The new function body has a prelude to load locals if rewinding,
// then the actual main body, which does all its unwindings by breaking
// to the unwind block, which then handles pushing the call index, as
// well as saving the locals.
// An index is needed for getting the unwinding and rewinding call indexes
// around TODO: can this be the same index?
auto unwindIndex = builder->addVar(func, Type::i32);
rewindIndex = builder->addVar(func, Type::i32);
// Rewrite the function body.
builder = std::make_unique<AsyncifyBuilder>(
*getModule(), pointerType, asyncifyMemory);
walk(func->body);
// After the normal function body, emit a barrier before the postamble.
Expression* barrier;
if (func->getResults() == Type::none) {
// The function may have ended without a return; ensure one.
barrier = builder->makeReturn();
} else {
// The function must have returned or hit an unreachable, but emit one
// to make possible bugs easier to figure out (as this should never be
// reached). The optimizer can remove this anyhow.
barrier = builder->makeUnreachable();
}
auto* newBody = builder->makeBlock(
{builder->makeIf(builder->makeStateCheck(State::Rewinding),
makeLocalLoading()),
builder->makeLocalSet(
unwindIndex,
builder->makeBlock(ASYNCIFY_UNWIND,
builder->makeSequence(func->body, barrier))),
makeCallIndexPush(unwindIndex),
makeLocalSaving()});
if (func->getResults() != Type::none) {
// If we unwind, we must still "return" a value, even if it will be
// ignored on the outside.
newBody->list.push_back(
LiteralUtils::makeZero(func->getResults(), *getModule()));
newBody->finalize(func->getResults());
}
func->body = newBody;
// Making things like returns conditional may alter types.
ReFinalize().walkFunctionInModule(func, getModule());
}
private:
std::unique_ptr<AsyncifyBuilder> builder;
Index rewindIndex;
std::unordered_map<Type, Index> fakeCallLocals;
std::set<Index> relevantLiveLocals;
void findRelevantLiveLocals(Function* func) {
struct RelevantLiveLocalsWalker
: public LivenessWalker<RelevantLiveLocalsWalker,
Visitor<RelevantLiveLocalsWalker>> {
// Basic blocks that have a possible unwind/rewind in them.
std::set<BasicBlock*> relevantBasicBlocks;
void visitCall(Call* curr) {
if (!currBasicBlock) {
return;
}
// Note blocks where we might unwind/rewind, all of which have a
// possible call to ASYNCIFY_CHECK_CALL_INDEX emitted right before the
// actual call.
// Note that each relevant original call was turned into a sequence of
// instructions, one of which is an if and then a call to this special
// intrinsic. We rely on the fact that if a local was live at the
// original call, it also would be in all that sequence of instructions,
// and in particular at the call we look for here (which is right before
// the call, and so anything that has its final use at the call is still
// live here).
if (curr->target == ASYNCIFY_CHECK_CALL_INDEX) {
relevantBasicBlocks.insert(currBasicBlock);
}
}
};
RelevantLiveLocalsWalker walker;
walker.setFunction(func);
walker.walkFunctionInModule(func, getModule());
// The relevant live locals are ones that are alive at an unwind/rewind
// location. TODO look more precisely inside basic blocks, as one might stop
// being live in the middle
for (auto* block : walker.liveBlocks) {
if (walker.relevantBasicBlocks.count(block)) {
for (auto local : block->contents.start) {
relevantLiveLocals.insert(local);
}
}
}
}
Expression* makeLocalLoading() {
if (relevantLiveLocals.empty()) {
return builder->makeNop();
}
auto* func = getFunction();
auto numLocals = func->getNumLocals();
Index total = 0;
for (Index i = 0; i < numLocals; i++) {
if (!relevantLiveLocals.count(i)) {
continue;
}
total += getByteSize(func->getLocalType(i));
}
auto* block = builder->makeBlock();
block->list.push_back(builder->makeIncStackPos(-total));
auto tempIndex = builder->addVar(func, builder->pointerType);
block->list.push_back(
builder->makeLocalSet(tempIndex, builder->makeGetStackPos()));
Index offset = 0;
for (Index i = 0; i < numLocals; i++) {
if (!relevantLiveLocals.count(i)) {
continue;
}
auto localType = func->getLocalType(i);
SmallVector<Expression*, 1> loads;
for (const auto& type : localType) {
auto size = getByteSize(type);
assert(size % STACK_ALIGN == 0);
// TODO: higher alignment?
loads.push_back(builder->makeLoad(
size,
true,
offset,
STACK_ALIGN,
builder->makeLocalGet(tempIndex, builder->pointerType),
type,
asyncifyMemory));
offset += size;
}
Expression* load;
if (loads.size() == 1) {
load = loads[0];
} else if (localType.size() > 1) {
load = builder->makeTupleMake(std::move(loads));
} else {
WASM_UNREACHABLE("Unexpected empty type");
}
block->list.push_back(builder->makeLocalSet(i, load));
}
block->finalize();
return block;
}
Expression* makeLocalSaving() {
if (relevantLiveLocals.empty()) {
return builder->makeNop();
}
auto* func = getFunction();
auto numLocals = func->getNumLocals();
auto* block = builder->makeBlock();
auto tempIndex = builder->addVar(func, builder->pointerType);
block->list.push_back(
builder->makeLocalSet(tempIndex, builder->makeGetStackPos()));
Index offset = 0;
for (Index i = 0; i < numLocals; i++) {
if (!relevantLiveLocals.count(i)) {
continue;
}
auto localType = func->getLocalType(i);
size_t j = 0;
for (const auto& type : localType) {
auto size = getByteSize(type);
Expression* localGet = builder->makeLocalGet(i, localType);
if (localType.size() > 1) {
localGet = builder->makeTupleExtract(localGet, j);
}
assert(size % STACK_ALIGN == 0);
// TODO: higher alignment?
block->list.push_back(builder->makeStore(
size,
offset,
STACK_ALIGN,
builder->makeLocalGet(tempIndex, builder->pointerType),
localGet,
type,
asyncifyMemory));
offset += size;
++j;
}
}
block->list.push_back(builder->makeIncStackPos(offset));
block->finalize();
return block;
}
Expression* makeCallIndexPush(Index tempIndex) {
// TODO: add a check against the stack end here
return builder->makeSequence(
builder->makeStore(4,
0,
4,
builder->makeGetStackPos(),
builder->makeLocalGet(tempIndex, Type::i32),
Type::i32,
asyncifyMemory),
builder->makeIncStackPos(4));
}
unsigned getByteSize(Type type) {
if (!type.hasByteSize()) {
Fatal() << "Asyncify does not yet support non-number types, like "
"references (see "
"https://github.com/WebAssembly/binaryen/issues/3739)";
}
return type.getByteSize();
}
};
} // anonymous namespace
static std::string getFullImportName(Name module, Name base) {
return std::string(module.str) + '.' + base.toString();
}
struct Asyncify : public Pass {
// Adds calls.
bool addsEffects() override { return true; }
void run(Module* module) override {
bool optimize = getPassOptions().optimizeLevel > 0;
// Find which things can change the state.
auto stateChangingImports = String::trim(read_possible_response_file(
getArgumentOrDefault("asyncify-imports", "")));
auto ignoreImports = getArgumentOrDefault("asyncify-ignore-imports", "");
bool allImportsCanChangeState =
stateChangingImports == "" && ignoreImports == "";
String::Split listedImports(stateChangingImports,
String::Split::NewLineOr(","));
// canIndirectChangeState is the default. asyncify-ignore-indirect sets it
// to false.
auto canIndirectChangeState = !hasArgument("asyncify-ignore-indirect");
std::string removeListInput =
getArgumentOrDefault("asyncify-removelist", "");
if (removeListInput.empty()) {
// Support old name for now to avoid immediate breakage TODO remove
removeListInput = getArgumentOrDefault("asyncify-blacklist", "");
}
String::Split removeList(
String::trim(read_possible_response_file(removeListInput)),
String::Split::NewLineOr(","));
String::Split addList(String::trim(read_possible_response_file(
getArgumentOrDefault("asyncify-addlist", ""))),
String::Split::NewLineOr(","));
std::string onlyListInput = getArgumentOrDefault("asyncify-onlylist", "");
if (onlyListInput.empty()) {
// Support old name for now to avoid immediate breakage TODO remove
onlyListInput = getArgumentOrDefault("asyncify-whitelist", "");
}
String::Split onlyList(
String::trim(read_possible_response_file(onlyListInput)),
String::Split::NewLineOr(","));
auto asserts = hasArgument("asyncify-asserts");
auto verbose = hasArgument("asyncify-verbose");
auto relocatable = hasArgument("asyncify-relocatable");
auto secondaryMemory = hasArgument("asyncify-in-secondary-memory");
auto propagateAddList = hasArgument("asyncify-propagate-addlist");
// Ensure there is a memory, as we need it.
if (secondaryMemory) {
auto secondaryMemorySizeString =
getArgumentOrDefault("asyncify-secondary-memory-size", "1");
Address secondaryMemorySize = std::stoi(secondaryMemorySizeString);
asyncifyMemory = createSecondaryMemory(module, secondaryMemorySize);
} else {
if (module->memories.size() <= 1) {
MemoryUtils::ensureExists(module);
asyncifyMemory = module->memories[0]->name;
} else {
auto asyncifyMemoryValue =
getArgumentOrDefault("asyncify-memory", "memory");
for (auto& theExport : module->exports) {
if (theExport->kind == ExternalKind::Memory &&
theExport->name == asyncifyMemoryValue) {
asyncifyMemory = theExport->value;
break;
}
}
if (!asyncifyMemory) {
Fatal() << "Please specify which of the multiple memories to use, "
"with --pass-arg=asyncify-memory@memory";
}
}
}
pointerType =
module->getMemory(asyncifyMemory)->is64() ? Type::i64 : Type::i32;
removeList = handleBracketingOperators(removeList);
addList = handleBracketingOperators(addList);
onlyList = handleBracketingOperators(onlyList);
if (!onlyList.empty() && (!removeList.empty() || !addList.empty())) {
Fatal() << "It makes no sense to use both an asyncify only-list together "
"with another list.";
}
auto canImportChangeState = [&](Name module, Name base) {
if (allImportsCanChangeState) {
return true;
}
auto full = getFullImportName(module, base);
for (auto& listedImport : listedImports) {
if (String::wildcardMatch(listedImport, full)) {
return true;
}
}
return false;
};
// Scan the module.
ModuleAnalyzer analyzer(*module,
canImportChangeState,
canIndirectChangeState,
removeList,
addList,
propagateAddList,
onlyList,
verbose);
// Add necessary globals before we emit code to use them.
addGlobals(module, relocatable);
// Compute the set of functions we will instrument. All of the passes we run
// below only need to run there.
PassUtils::FuncSet instrumentedFuncs;
for (auto& func : module->functions) {
if (analyzer.needsInstrumentation(func.get())) {
instrumentedFuncs.insert(func.get());
}
}
// Instrument the flow of code, adding code instrumentation and
// skips for when rewinding. We do this on flat IR so that it is
// practical to add code around each call, without affecting
// anything else.
{
PassUtils::FilteredPassRunner runner(module, instrumentedFuncs);
runner.add("flatten");
// Dce is useful here, since AsyncifyFlow makes control flow conditional,
// which may make unreachable code look reachable. It also lets us ignore
// unreachable code here.
runner.add("dce");
if (optimize) {
// Optimizing before AsyncifyFlow is crucial, especially coalescing,
// because the flow changes add many branches, break up if-elses, etc.,
// all of which extend the live ranges of locals. In other words, it is
// not possible to coalesce well afterwards.
runner.add("remove-unused-names");
runner.add("simplify-locals-nonesting");
runner.add("reorder-locals");
runner.add("coalesce-locals");
runner.add("simplify-locals-nonesting");
runner.add("reorder-locals");
runner.add("merge-blocks");
}
runner.add(
std::make_unique<AsyncifyFlow>(&analyzer, pointerType, asyncifyMemory));
runner.setIsNested(true);
runner.setValidateGlobally(false);
runner.run();
}
if (asserts) {
// Add asserts in non-instrumented code. Note we do not use an
// instrumented pass runner here as we do want to run on all functions.
PassRunner runner(module);
runner.add(std::make_unique<AsyncifyAssertInNonInstrumented>(
&analyzer, pointerType, asyncifyMemory));
runner.setIsNested(true);
runner.setValidateGlobally(false);
runner.run();
}
// Next, add local saving/restoring logic. We optimize before doing this,
// to undo the extra code generated by flattening, and to arrive at the
// minimal amount of locals (which is important as we must save and
// restore those locals). We also and optimize after as well to simplify
// the code as much as possible.
{
PassUtils::FilteredPassRunner runner(module, instrumentedFuncs);
if (optimize) {
runner.addDefaultFunctionOptimizationPasses();
}
runner.add(std::make_unique<AsyncifyLocals>(
&analyzer, pointerType, asyncifyMemory));
if (optimize) {
runner.addDefaultFunctionOptimizationPasses();
}
runner.setIsNested(true);
runner.setValidateGlobally(false);
runner.run();
}
// Finally, add function support (that should not have been seen by
// the previous passes).
addFunctions(module);
}
private:
void addGlobals(Module* module, bool imported) {
Builder builder(*module);
auto asyncifyState = builder.makeGlobal(ASYNCIFY_STATE,
Type::i32,
builder.makeConst(int32_t(0)),
Builder::Mutable);
if (imported) {
asyncifyState->module = ENV;
asyncifyState->base = ASYNCIFY_STATE;
}
module->addGlobal(std::move(asyncifyState));
auto asyncifyData = builder.makeGlobal(ASYNCIFY_DATA,
pointerType,
builder.makeConst(pointerType),
Builder::Mutable);
if (imported) {
asyncifyData->module = ENV;
asyncifyData->base = ASYNCIFY_DATA;
}
module->addGlobal(std::move(asyncifyData));
}
void addFunctions(Module* module) {
Builder builder(*module);
auto makeFunction = [&](Name name, bool setData, State state) {
std::vector<Type> params;
if (setData) {
params.push_back(pointerType);
}
auto* body = builder.makeBlock();
body->list.push_back(builder.makeGlobalSet(
ASYNCIFY_STATE, builder.makeConst(int32_t(state))));
if (setData) {
body->list.push_back(builder.makeGlobalSet(
ASYNCIFY_DATA, builder.makeLocalGet(0, pointerType)));
}
// Verify the data is valid.
auto* stackPos =
builder.makeLoad(pointerType.getByteSize(),
false,
int(DataOffset::BStackPos),
pointerType.getByteSize(),
builder.makeGlobalGet(ASYNCIFY_DATA, pointerType),
pointerType,
asyncifyMemory);
auto* stackEnd =
builder.makeLoad(pointerType.getByteSize(),
false,
int(pointerType == Type::i64 ? DataOffset::BStackEnd64
: DataOffset::BStackEnd),
pointerType.getByteSize(),
builder.makeGlobalGet(ASYNCIFY_DATA, pointerType),
pointerType,
asyncifyMemory);
body->list.push_back(builder.makeIf(
builder.makeBinary(
Abstract::getBinary(pointerType, Abstract::GtU), stackPos, stackEnd),
builder.makeUnreachable()));
body->finalize();
auto func = builder.makeFunction(
name, Signature(Type(params), Type::none), {}, body);
module->addFunction(std::move(func));
module->addExport(builder.makeExport(name, name, ExternalKind::Function));
};
makeFunction(ASYNCIFY_START_UNWIND, true, State::Unwinding);
makeFunction(ASYNCIFY_STOP_UNWIND, false, State::Normal);
makeFunction(ASYNCIFY_START_REWIND, true, State::Rewinding);
makeFunction(ASYNCIFY_STOP_REWIND, false, State::Normal);
module->addFunction(
builder.makeFunction(ASYNCIFY_GET_STATE,
Signature(Type::none, Type::i32),
{},
builder.makeGlobalGet(ASYNCIFY_STATE, Type::i32)));
module->addExport(builder.makeExport(
ASYNCIFY_GET_STATE, ASYNCIFY_GET_STATE, ExternalKind::Function));
}
Name createSecondaryMemory(Module* module, Address secondaryMemorySize) {
Name name = Names::getValidMemoryName(*module, "asyncify_memory");
auto secondaryMemory =
Builder::makeMemory(name, secondaryMemorySize, secondaryMemorySize);
module->addMemory(std::move(secondaryMemory));
return name;
}
Type pointerType;
Name asyncifyMemory;
};
Pass* createAsyncifyPass() { return new Asyncify(); }
// Helper passes that can be run after Asyncify.
template<bool neverRewind, bool neverUnwind, bool importsAlwaysUnwind>
struct ModAsyncify
: public WalkerPass<LinearExecutionWalker<
ModAsyncify<neverRewind, neverUnwind, importsAlwaysUnwind>>> {
bool isFunctionParallel() override { return true; }
std::unique_ptr<Pass> create() override {
return std::make_unique<
ModAsyncify<neverRewind, neverUnwind, importsAlwaysUnwind>>();
}
void doWalkFunction(Function* func) {
// Find the asyncify state name.
auto* unwind = this->getModule()->getExport(ASYNCIFY_STOP_UNWIND);
auto* unwindFunc = this->getModule()->getFunction(unwind->value);
FindAll<GlobalSet> sets(unwindFunc->body);
assert(sets.list.size() == 1);
asyncifyStateName = sets.list[0]->name;
// Walk and optimize.
this->walk(func->body);
}
// Note that we don't just implement GlobalGet as we may know the value is
// *not* 0, 1, or 2, but not know the actual value. So what we can say depends
// on the comparison being done on it, and so we implement Binary and
// Select.
void visitBinary(Binary* curr) {
// Check if this is a comparison of the asyncify state to a specific
// constant, which we may know is impossible.
bool flip = false;
if (curr->op == NeInt32) {
flip = true;
} else if (curr->op != EqInt32) {
return;
}
auto* c = curr->right->dynCast<Const>();
if (!c) {
return;
}
auto* get = curr->left->dynCast<GlobalGet>();
if (!get || get->name != asyncifyStateName) {
return;
}
// This is a comparison of the state to a constant, check if we know the
// value.
int32_t value;
auto checkedValue = c->value.geti32();
if ((checkedValue == int(State::Unwinding) && neverUnwind) ||
(checkedValue == int(State::Rewinding) && neverRewind)) {
// We know the state is checked against an impossible value.
value = 0;
} else if (checkedValue == int(State::Unwinding) && this->unwinding) {
// We know we are in fact unwinding right now.
value = 1;
unsetUnwinding();
} else {
return;
}
if (flip) {
value = 1 - value;
}
Builder builder(*this->getModule());
this->replaceCurrent(builder.makeConst(int32_t(value)));
}
void visitSelect(Select* curr) {
auto* get = curr->condition->dynCast<GlobalGet>();
if (!get || get->name != asyncifyStateName) {
return;
}
// This is a comparison of the normal state, which means we are checking
// "if running normally, run this code, but if rewinding, ignore it". If
// we know we'll never rewind, we can optimize this.
if (neverRewind) {
Builder builder(*this->getModule());
curr->condition = builder.makeConst(int32_t(0));
}
}
void visitUnary(Unary* curr) {
if (curr->op != EqZInt32) {
return;
}
auto* get = curr->value->dynCast<GlobalGet>();
if (!get || get->name != asyncifyStateName) {
return;
}
// This is a comparison of the state to zero, which means we are checking
// "if running normally, run this code, but if rewinding, ignore it". If
// we know we'll never rewind, we can optimize this.
if (neverRewind) {
Builder builder(*this->getModule());
// The whole expression will be 1 because it is (i32.eqz (i32.const 0))
this->replaceCurrent(builder.makeConst(int32_t(1)));
}
}
void visitCall(Call* curr) {
unsetUnwinding();
if (!importsAlwaysUnwind) {
return;
}
auto* target = this->getModule()->getFunction(curr->target);
if (!target->imported()) {
return;
}
// This is an import that definitely unwinds. Await the next check of
// the state in this linear execution trace, which we can turn into a
// constant.
this->unwinding = true;
}
void visitCallIndirect(CallIndirect* curr) { unsetUnwinding(); }
static void doNoteNonLinear(
ModAsyncify<neverRewind, neverUnwind, importsAlwaysUnwind>* self,
Expression**) {
// When control flow branches, stop tracking an unwinding.
self->unsetUnwinding();
}
void visitGlobalSet(GlobalSet* set) {
// TODO: this could be more precise
unsetUnwinding();
}
private:
Name asyncifyStateName;
// Whether we just did a call to an import that indicates we are unwinding.
bool unwinding = false;
void unsetUnwinding() { this->unwinding = false; }
};
//
// Assume imports that may unwind will always unwind, and that rewinding never
// happens.
//
Pass* createModAsyncifyAlwaysOnlyUnwindPass() {
return new ModAsyncify<true, false, true>();
}
//
// Assume that we never unwind, but may still rewind.
//
struct ModAsyncifyNeverUnwind : public Pass {
void run(Module* module) override {}
};
Pass* createModAsyncifyNeverUnwindPass() {
return new ModAsyncify<false, true, false>();
}
} // namespace wasm