blob: 7438f8e695f5ab8ad920896f1efa9fd1b6614c37 [file] [log] [blame] [edit]
/*
* Copyright 2020 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// The process of module splitting involves these steps:
//
// 1. Create the new secondary modules.
//
// 2. Move the deferred functions from the primary to each of the secondary
// modules.
//
// 3. For any secondary function exported from the primary module, export in
// its place a trampoline function that makes an indirect call to its
// placeholder function (and eventually to the original secondary
// function), allocating a new table slot for the placeholder if necessary.
//
// 4. Replace all references to each secondary module's functions in the
// primary module's and each other secondary module's table segments with
// references to imported placeholder functions.
//
// 5. Rewrite direct calls from primary functions to secondary functions to be
// indirect calls to their placeholder functions (and eventually to their
// original secondary functions), allocating new table slots for the
// placeholders if necessary.
//
// 6. For each primary function directly called from a secondary function,
// export the primary function if it is not already exported and import it
// into each secondary module using it.
//
// 7. For each secondary module, create new active table segments in the
// module that will replace all the placeholder function references in the
// table with references to their corresponding secondary functions upon
// instantiation.
//
// 8. Export globals, tags, tables, and memories from the primary module and
// import them in the secondary modules.
//
// 9. Run RemoveUnusedModuleElements pass on the secondary modules in order to
// remove unused imports.
//
// Functions can be used or referenced three ways in a WebAssembly module: they
// can be exported, called, or referenced with ref.func. The above procedure
// introduces a layer of indirection to each of those mechanisms that removes
// all references to secondary functions from the primary module but restores
// the original program's semantics once the secondary modules are instantiated.
//
// The code as currently written makes a couple assumptions about the module
// that is being split:
//
// 1. It assumes that mutable-globals is allowed. This could be worked around
// by introducing wrapper functions for globals and rewriting secondary
// code that accesses them, but now that mutable-globals is shipped on all
// browsers, hopefully that extra complexity won't be necessary.
//
// 2. It assumes that either all table segment offsets are constants or there
// is exactly one segment that may have a non-constant offset. It also
// assumes that all segments are active segments.
//
// 3. It assumes that if exact function references are required for validity
// (because they are stored in a local with an exact function type, for
// example), then custom descriptors are allowed so primary functions can
// be imported exactly. This could be worked around by removing exactness
// from the IR before splitting.
//
#include "ir/module-splitting.h"
#include "asmjs/shared-constants.h"
#include "ir/export-utils.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm::ModuleSplitting {
namespace {
static const Name LOAD_SECONDARY_STATUS = "load_secondary_module_status";
template<class F> void forEachElement(Module& module, F f) {
ModuleUtils::iterActiveElementSegments(module, [&](ElementSegment* segment) {
Name base = "";
Index offset = 0;
if (auto* c = segment->offset->dynCast<Const>()) {
offset = c->value.getInteger();
} else if (auto* g = segment->offset->dynCast<GlobalGet>()) {
base = g->name;
}
for (Index i = 0; i < segment->data.size(); ++i) {
f(segment->table, base, offset + i, segment->data[i]);
}
});
}
struct TableSlotManager {
struct Slot {
Name tableName;
// If `global` is empty, then this slot is at a statically known index.
Name global;
Index index = 0;
// Generate code to compute the index of this table slot
Expression* makeExpr(Module& module);
};
Module& module;
Table* activeTable = nullptr;
ElementSegment* activeSegment = nullptr;
Slot activeBase;
std::map<Name, Slot> funcIndices;
std::vector<ElementSegment*> activeTableSegments;
TableSlotManager(Module& module);
Table* makeTable();
ElementSegment* makeElementSegment();
// Returns the table index for `func`, allocating a new index if necessary.
Slot getSlot(Name func, HeapType type);
void addSlot(Name func, Slot slot);
};
Expression* TableSlotManager::Slot::makeExpr(Module& module) {
Builder builder(module);
auto* table = module.getTable(tableName);
auto makeIndex = [&]() {
return builder.makeConst(Literal::makeFromInt32(index, table->addressType));
};
if (global.size()) {
Expression* getBase = builder.makeGlobalGet(global, table->addressType);
auto addOp = table->is64() ? AddInt64 : AddInt32;
return index == 0 ? getBase
: builder.makeBinary(addOp, getBase, makeIndex());
} else {
return makeIndex();
}
}
void TableSlotManager::addSlot(Name func, Slot slot) {
// Ignore functions that already have slots.
funcIndices.insert({func, slot});
}
TableSlotManager::TableSlotManager(Module& module) : module(module) {
// If possible, just create a new table to manage all primary-to-secondary
// calls lazily. Do not re-use slots for functions that will already be in
// existing tables, since that is not correct in the face of table mutations.
// However, do not do this for emscripten; its loader code (and dynamic
// loading in particular) do not support this yet.
// TODO: Reduce overhead by creating a separate table for each function type
// if WasmGC is enabled.
Export* emscriptenTableExport =
module.getExportOrNull("__indirect_function_table");
Table* singletonTable =
module.tables.size() == 1 ? module.tables[0].get() : nullptr;
bool emscriptenTableImport =
singletonTable && singletonTable->imported() &&
singletonTable->module == "env" &&
singletonTable->base == "__indirect_function_table";
if (module.features.hasReferenceTypes() && !emscriptenTableExport &&
!emscriptenTableImport) {
return;
}
// TODO: Reject or handle passive element segments
auto funcref = Type(HeapType::func, Nullable);
auto it = std::find_if(
module.tables.begin(),
module.tables.end(),
[&](std::unique_ptr<Table>& table) { return table->type == funcref; });
if (it == module.tables.end()) {
// There is no indirect function table, so we will create one lazily.
return;
}
activeTable = it->get();
ModuleUtils::iterTableSegments(
module, activeTable->name, [&](ElementSegment* segment) {
activeTableSegments.push_back(segment);
});
if (activeTableSegments.empty()) {
// There are no active segments, so we will lazily create one and start
// filling it at index 0.
activeBase = {activeTable->name, "", 0};
} else if (activeTableSegments.size() == 1 &&
activeTableSegments[0]->type == funcref &&
!activeTableSegments[0]->offset->is<Const>()) {
// If there is exactly one table segment and that segment has a non-constant
// offset, append new items to the end of that segment. In all other cases,
// append new items at constant offsets after all existing items at constant
// offsets.
assert(activeTableSegments[0]->offset->is<GlobalGet>() &&
"Unexpected initializer instruction");
activeSegment = activeTableSegments[0];
activeBase = {activeTable->name,
activeTableSegments[0]->offset->cast<GlobalGet>()->name,
0};
} else {
// Finds the segment with the highest occupied table slot so that new items
// can be inserted contiguously at the end of it without accidentally
// overwriting any other items. TODO: be more clever about filling gaps in
// the table, if that is ever useful.
Index maxIndex = 0;
for (auto& segment : activeTableSegments) {
assert(segment->offset->is<Const>() &&
"Unexpected non-const segment offset with multiple segments");
Index segmentBase = segment->offset->cast<Const>()->value.getInteger();
if (segmentBase + segment->data.size() >= maxIndex) {
maxIndex = segmentBase + segment->data.size();
activeSegment = segment;
activeBase = {activeTable->name, "", segmentBase};
}
}
}
// Initialize funcIndices with the functions already in the table.
forEachElement(module,
[&](Name table, Name base, Index offset, Expression* elem) {
if (auto* func = elem->dynCast<RefFunc>()) {
addSlot(func->func, {table, base, offset});
}
});
}
Table* TableSlotManager::makeTable() {
return module.addTable(
Builder::makeTable(Names::getValidTableName(module, Name::fromInt(0))));
}
ElementSegment* TableSlotManager::makeElementSegment() {
Builder builder(module);
Expression* offset =
builder.makeConst(Literal::makeFromInt32(0, activeTable->addressType));
return module.addElementSegment(Builder::makeElementSegment(
Names::getValidElementSegmentName(module, Name::fromInt(0)),
activeTable->name,
offset));
}
TableSlotManager::Slot TableSlotManager::getSlot(Name func, HeapType type) {
auto slotIt = funcIndices.find(func);
if (slotIt != funcIndices.end()) {
return slotIt->second;
}
// If there are no segments yet, allocate one.
if (activeSegment == nullptr) {
if (activeTable == nullptr) {
activeTable = makeTable();
activeBase = {activeTable->name, "", 0};
}
// None of the existing segments should refer to the active table
assert(std::all_of(module.elementSegments.begin(),
module.elementSegments.end(),
[&](std::unique_ptr<ElementSegment>& segment) {
return segment->table != activeTable->name;
}));
activeSegment = makeElementSegment();
}
Slot newSlot = {activeBase.tableName,
activeBase.global,
activeBase.index + Index(activeSegment->data.size())};
Builder builder(module);
auto funcType = Type(type, NonNullable, Inexact);
activeSegment->data.push_back(builder.makeRefFunc(func, funcType));
addSlot(func, newSlot);
if (activeTable->initial <= newSlot.index) {
activeTable->initial = newSlot.index + 1;
// TODO: handle the active table not being the dylink table (#3823)
if (module.dylinkSection) {
module.dylinkSection->tableSize = activeTable->initial;
}
}
if (activeTable->max <= newSlot.index) {
activeTable->max = newSlot.index + 1;
}
return newSlot;
}
struct ModuleSplitter {
const Config& config;
std::vector<std::unique_ptr<Module>> secondaries;
Module& primary;
std::unordered_set<Name> primaryFuncs;
std::unordered_set<Name> allSecondaryFuncs;
std::unordered_map<Name, Index> funcToSecondaryIndex;
TableSlotManager tableManager;
Names::MinifiedNameGenerator minified;
// Map from internal function names to (one of) their corresponding export
// names.
std::unordered_map<Name, Name> exportedPrimaryFuncs;
// For each table, map placeholder indices to the names of the functions they
// replace.
std::unordered_map<Name, std::map<size_t, Name>> placeholderMap;
// Internal name of the LOAD_SECONDARY_MODULE function.
Name internalLoadSecondaryModule;
// Map from original secondary function name to its trampoline
std::unordered_map<Name, Name> trampolineMap;
// Initialization helpers
static std::unique_ptr<Module> initSecondary(const Module& primary);
static std::unordered_map<Name, Name>
initExportedPrimaryFuncs(const Module& primary);
// Other helpers
void exportImportFunction(Name func, const std::set<Module*>& modules);
Expression* maybeLoadSecondary(Builder& builder, Expression* callIndirect);
Name getTrampoline(Name funcName);
// Main splitting steps
void classifyFunctions();
void setupJSPI();
void moveSecondaryFunctions();
void thunkExportedSecondaryFunctions();
void indirectReferencesToSecondaryFunctions();
void indirectCallsToSecondaryFunctions();
void exportImportCalledPrimaryFunctions();
void setupTablePatching();
void shareImportableItems();
void removeUnusedSecondaryElements();
ModuleSplitter(Module& primary, const Config& config)
: config(config), primary(primary), tableManager(primary),
exportedPrimaryFuncs(initExportedPrimaryFuncs(primary)) {
classifyFunctions();
if (config.jspi) {
setupJSPI();
}
moveSecondaryFunctions();
thunkExportedSecondaryFunctions();
indirectReferencesToSecondaryFunctions();
indirectCallsToSecondaryFunctions();
exportImportCalledPrimaryFunctions();
setupTablePatching();
shareImportableItems();
removeUnusedSecondaryElements();
}
};
void ModuleSplitter::setupJSPI() {
// Support the first version of JSPI, where the JSPI pass added the load
// secondary module export.
// TODO: remove this when the new JSPI API is only supported.
if (auto* loadSecondary = primary.getExportOrNull(LOAD_SECONDARY_MODULE);
loadSecondary && loadSecondary->kind == ExternalKind::Function) {
internalLoadSecondaryModule = *loadSecondary->getInternalName();
// Remove the exported LOAD_SECONDARY_MODULE function since it's only needed
// internally.
primary.removeExport(LOAD_SECONDARY_MODULE);
} else {
// Add an imported function to load the secondary module.
auto import = Builder::makeFunction(
ModuleSplitting::LOAD_SECONDARY_MODULE,
Type(Signature(Type::none, Type::none), NonNullable, Inexact),
{});
import->module = ENV;
import->base = ModuleSplitting::LOAD_SECONDARY_MODULE;
primary.addFunction(std::move(import));
internalLoadSecondaryModule = ModuleSplitting::LOAD_SECONDARY_MODULE;
}
Builder builder(primary);
// Add a global to track whether the secondary module has been loaded yet.
primary.addGlobal(builder.makeGlobal(LOAD_SECONDARY_STATUS,
Type::i32,
builder.makeConst(int32_t(0)),
Builder::Mutable));
primary.addExport(builder.makeExport(
LOAD_SECONDARY_STATUS, LOAD_SECONDARY_STATUS, ExternalKind::Global));
}
std::unique_ptr<Module> ModuleSplitter::initSecondary(const Module& primary) {
// Create the secondary module and copy trivial properties.
auto secondary = std::make_unique<Module>();
secondary->features = primary.features;
secondary->hasFeaturesSection = primary.hasFeaturesSection;
return secondary;
}
void ModuleSplitter::classifyFunctions() {
// Find functions that refer to data or element segments. These functions must
// remain in the primary module because segments cannot be exported to be
// accessed from the secondary module.
//
// TODO: Investigate other options, such as moving the segments to the
// secondary module or replacing the segment-using instructions in the
// secondary module with calls to imports.
ModuleUtils::ParallelFunctionAnalysis<std::vector<Name>>
segmentReferrerCollector(
primary, [&](Function* func, std::vector<Name>& segmentReferrers) {
if (func->imported()) {
return;
}
struct SegmentReferrerCollector
: PostWalker<SegmentReferrerCollector,
UnifiedExpressionVisitor<SegmentReferrerCollector>> {
bool hasSegmentReference = false;
void visitExpression(Expression* curr) {
#define DELEGATE_ID curr->_id
#define DELEGATE_START(id) [[maybe_unused]] auto* cast = curr->cast<id>();
#define DELEGATE_GET_FIELD(id, field) cast->field
#define DELEGATE_FIELD_TYPE(id, field)
#define DELEGATE_FIELD_HEAPTYPE(id, field)
#define DELEGATE_FIELD_CHILD(id, field)
#define DELEGATE_FIELD_OPTIONAL_CHILD(id, field)
#define DELEGATE_FIELD_INT(id, field)
#define DELEGATE_FIELD_LITERAL(id, field)
#define DELEGATE_FIELD_NAME(id, field)
#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field)
#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field)
#define DELEGATE_FIELD_ADDRESS(id, field)
#define DELEGATE_FIELD_NAME_KIND(id, field, kind) \
if (kind == ModuleItemKind::DataSegment || \
kind == ModuleItemKind::ElementSegment) { \
hasSegmentReference = true; \
}
#include "wasm-delegations-fields.def"
}
};
SegmentReferrerCollector collector;
collector.walkFunction(func);
if (collector.hasSegmentReference) {
segmentReferrers.push_back(func->name);
}
});
std::unordered_set<Name> segmentReferrers;
for (auto& [_, referrers] : segmentReferrerCollector.map) {
segmentReferrers.insert(referrers.begin(), referrers.end());
}
std::unordered_set<Name> configSecondaryFuncs;
for (auto& funcs : config.secondaryFuncs) {
configSecondaryFuncs.insert(funcs.begin(), funcs.end());
}
for (auto& func : primary.functions) {
// In JSPI mode exported functions cannot be moved to the secondary
// module since that would make them async when they may not have the JSPI
// wrapper. Exported JSPI functions can still benefit from splitting though
// since only the JSPI wrapper stub will remain in the primary module.
if (func->imported() || !configSecondaryFuncs.count(func->name) ||
(config.jspi && ExportUtils::isExported(primary, *func)) ||
segmentReferrers.count(func->name)) {
primaryFuncs.insert(func->name);
} else {
assert(func->name != primary.start && "The start function must be kept");
allSecondaryFuncs.insert(func->name);
}
}
}
std::unordered_map<Name, Name>
ModuleSplitter::initExportedPrimaryFuncs(const Module& primary) {
std::unordered_map<Name, Name> functionExportNames;
for (auto& ex : primary.exports) {
if (ex->kind == ExternalKind::Function) {
functionExportNames[*ex->getInternalName()] = ex->name;
}
}
return functionExportNames;
}
void ModuleSplitter::exportImportFunction(Name funcName,
const std::set<Module*>& modules) {
Name exportName;
// If the function is already exported, use the existing export name.
// Otherwise, create a new export for it.
auto exportIt = exportedPrimaryFuncs.find(funcName);
if (exportIt != exportedPrimaryFuncs.end()) {
exportName = exportIt->second;
} else {
if (config.minimizeNewExportNames) {
do {
exportName = config.newExportPrefix + minified.getName();
} while (primary.getExportOrNull(exportName) != nullptr);
} else {
exportName = Names::getValidExportName(
primary, config.newExportPrefix + funcName.toString());
}
primary.addExport(
Builder::makeExport(exportName, funcName, ExternalKind::Function));
exportedPrimaryFuncs[funcName] = exportName;
}
// Import the function if it is not already imported into the secondary
// module.
for (auto* secondary : modules) {
if (secondary->getFunctionOrNull(funcName) == nullptr) {
auto primaryFunc = primary.getFunction(funcName);
auto func = Builder::makeFunction(funcName, primaryFunc->type, {});
func->hasExplicitName = primaryFunc->hasExplicitName;
func->module = config.importNamespace;
func->base = exportName;
func->type = func->type.withInexactIfNoCustomDescs(secondary->features);
secondary->addFunction(std::move(func));
}
}
}
void ModuleSplitter::moveSecondaryFunctions() {
// Move the specified functions from the primary to the secondary modules.
for (auto& funcNames : config.secondaryFuncs) {
auto secondary = initSecondary(primary);
for (auto funcName : funcNames) {
if (allSecondaryFuncs.count(funcName)) {
auto* func = primary.getFunction(funcName);
ModuleUtils::copyFunction(func, *secondary);
primary.removeFunction(funcName);
funcToSecondaryIndex[funcName] = secondaries.size();
}
}
secondaries.push_back(std::move(secondary));
}
}
Name ModuleSplitter::getTrampoline(Name funcName) {
auto [it, inserted] = trampolineMap.insert({funcName, Name()});
if (!inserted) {
return it->second;
}
Builder builder(primary);
Module& secondary = *secondaries.at(funcToSecondaryIndex.at(funcName));
auto* oldFunc = secondary.getFunction(funcName);
auto trampoline = Names::getValidFunctionName(
primary, std::string("trampoline_") + funcName.toString());
it->second = trampoline;
// Generate the call and the function.
std::vector<Expression*> args;
for (Index i = 0; i < oldFunc->getNumParams(); i++) {
args.push_back(builder.makeLocalGet(i, oldFunc->getLocalType(i)));
}
auto* call = builder.makeCall(funcName, args, oldFunc->getResults());
auto func = builder.makeFunction(trampoline, oldFunc->type, {}, call);
func->hasExplicitName = oldFunc->hasExplicitName;
primary.addFunction(std::move(func));
primaryFuncs.insert(trampoline);
return trampoline;
}
void ModuleSplitter::thunkExportedSecondaryFunctions() {
// Update exports of secondary functions in the primary module to export
// wrapper functions that indirectly call the secondary functions. We are
// adding secondary function names to the primary table here, but they will be
// replaced with placeholder functions later along with any references to
// secondary functions that were already in the table.
Builder builder(primary);
for (auto& ex : primary.exports) {
if (ex->kind != ExternalKind::Function ||
!allSecondaryFuncs.count(*ex->getInternalName())) {
continue;
}
Name trampoline = getTrampoline(*ex->getInternalName());
ex->setInternalName(trampoline);
}
}
Expression* ModuleSplitter::maybeLoadSecondary(Builder& builder,
Expression* callIndirect) {
if (!config.jspi) {
return callIndirect;
}
// Check if the secondary module is loaded and if it isn't, call the
// function to load it.
auto* loadSecondary = builder.makeIf(
builder.makeUnary(EqZInt32,
builder.makeGlobalGet(LOAD_SECONDARY_STATUS, Type::i32)),
builder.makeCall(internalLoadSecondaryModule, {}, Type::none));
return builder.makeSequence(loadSecondary, callIndirect);
}
void ModuleSplitter::indirectReferencesToSecondaryFunctions() {
// Turn references to secondary functions into references to thunks that
// perform a direct call to the original referent. The direct calls in the
// thunks will be handled like all other cross-module calls later, in
// |indirectCallsToSecondaryFunctions|.
struct Gatherer : public PostWalker<Gatherer> {
ModuleSplitter& parent;
Gatherer(ModuleSplitter& parent) : parent(parent) {}
// Collect RefFuncs in a map from the function name to all RefFuncs that
// refer to it. We only collect this for secondary funcs.
InsertOrderedMap<Name, std::vector<RefFunc*>> map;
void visitRefFunc(RefFunc* curr) {
Module* currModule = getModule();
// Add ref.func to the map when
// 1. ref.func's target func is in one of the secondary modules and
// 2. the current module is a different module (either the primary module
// or a different secondary module)
if (parent.allSecondaryFuncs.count(curr->func) &&
(currModule == &parent.primary ||
parent.secondaries.at(parent.funcToSecondaryIndex.at(curr->func))
.get() != currModule)) {
map[curr->func].push_back(curr);
}
}
} gatherer(*this);
gatherer.walkModule(&primary);
for (auto& secondaryPtr : secondaries) {
gatherer.walkModule(secondaryPtr.get());
}
// Ignore references to secondary functions that occur in the active segment
// that will contain the imported placeholders. Indirect calls to table slots
// initialized by that segment will already go to the right place once the
// secondary module has been loaded and the table has been patched.
std::unordered_set<RefFunc*> ignore;
if (tableManager.activeSegment) {
for (auto* expr : tableManager.activeSegment->data) {
if (auto* ref = expr->dynCast<RefFunc>()) {
ignore.insert(ref);
}
}
}
// Fix up what we found: Generate trampolines as described earlier, and apply
// them.
Builder builder(primary);
// Generate the new trampoline function and add it to the module.
for (auto& [name, refFuncs] : gatherer.map) {
// Find the relevant (non-ignored) RefFuncs. If there are none, we can skip
// creating a thunk entirely.
std::vector<RefFunc*> relevantRefFuncs;
for (auto* refFunc : refFuncs) {
assert(refFunc->func == name);
if (!ignore.count(refFunc)) {
relevantRefFuncs.push_back(refFunc);
}
}
if (relevantRefFuncs.empty()) {
continue;
}
Name trampoline = getTrampoline(name);
// Update RefFuncs to refer to it.
for (auto* refFunc : relevantRefFuncs) {
refFunc->func = trampoline;
}
}
}
void ModuleSplitter::indirectCallsToSecondaryFunctions() {
// Update direct calls of secondary functions to be indirect calls of their
// corresponding table indices instead.
struct CallIndirector : public PostWalker<CallIndirector> {
ModuleSplitter& parent;
CallIndirector(ModuleSplitter& parent) : parent(parent) {}
void visitCall(Call* curr) {
// Return if the call's target is not in one of the secondary module.
if (!parent.allSecondaryFuncs.count(curr->target)) {
return;
}
// Return if the current module is the same module as the call's target,
// because we don't need a call_indirect within the same module.
Module* currModule = getModule();
if (currModule != &parent.primary &&
parent.secondaries.at(parent.funcToSecondaryIndex.at(curr->target))
.get() == currModule) {
return;
}
Builder builder(*getModule());
Index secIndex = parent.funcToSecondaryIndex.at(curr->target);
auto* func = parent.secondaries.at(secIndex)->getFunction(curr->target);
auto tableSlot =
parent.tableManager.getSlot(curr->target, func->type.getHeapType());
replaceCurrent(parent.maybeLoadSecondary(
builder,
builder.makeCallIndirect(tableSlot.tableName,
tableSlot.makeExpr(parent.primary),
curr->operands,
func->type.getHeapType(),
curr->isReturn)));
}
};
CallIndirector callIndirector(*this);
callIndirector.walkModule(&primary);
for (auto& secondaryPtr : secondaries) {
callIndirector.walkModule(secondaryPtr.get());
}
}
void ModuleSplitter::exportImportCalledPrimaryFunctions() {
// Find primary functions called/referred to from the secondary modules.
using CalledPrimaryToModules = std::map<Name, std::set<Module*>>;
for (auto& secondaryPtr : secondaries) {
Module* secondary = secondaryPtr.get();
ModuleUtils::ParallelFunctionAnalysis<CalledPrimaryToModules> callCollector(
*secondary,
[&](Function* func, CalledPrimaryToModules& calledPrimaryToModules) {
struct CallCollector : PostWalker<CallCollector> {
const std::unordered_set<Name>& primaryFuncs;
CalledPrimaryToModules& calledPrimaryToModules;
CallCollector(const std::unordered_set<Name>& primaryFuncs,
CalledPrimaryToModules& calledPrimaryToModules)
: primaryFuncs(primaryFuncs),
calledPrimaryToModules(calledPrimaryToModules) {}
void visitCall(Call* curr) {
if (primaryFuncs.count(curr->target)) {
calledPrimaryToModules[curr->target].insert(getModule());
}
}
void visitRefFunc(RefFunc* curr) {
if (primaryFuncs.count(curr->func)) {
calledPrimaryToModules[curr->func].insert(getModule());
}
}
};
CallCollector(primaryFuncs, calledPrimaryToModules)
.walkFunctionInModule(func, secondary);
});
CalledPrimaryToModules calledPrimaryToModules;
for (auto& [_, map] : callCollector.map) {
calledPrimaryToModules.merge(map);
}
// Ensure each called primary function is exported and imported
for (auto& [func, modules] : calledPrimaryToModules) {
exportImportFunction(func, modules);
}
}
}
void ModuleSplitter::setupTablePatching() {
if (!tableManager.activeTable) {
return;
}
std::map<Module*, std::map<Index, Function*>> moduleToReplacedElems;
// Replace table references to secondary functions with an imported
// placeholder that encodes the table index in its name:
// `importNamespace`.`index`.
forEachElement(
primary, [&](Name table, Name, Index index, Expression*& elem) {
auto* ref = elem->dynCast<RefFunc>();
if (!ref) {
return;
}
if (!allSecondaryFuncs.count(ref->func)) {
return;
}
assert(table == tableManager.activeTable->name);
placeholderMap[table][index] = ref->func;
Index secondaryIndex = funcToSecondaryIndex.at(ref->func);
Module& secondary = *secondaries.at(secondaryIndex);
Name secondaryName = config.secondaryNames.at(secondaryIndex);
auto* secondaryFunc = secondary.getFunction(ref->func);
moduleToReplacedElems[&secondary][index] = secondaryFunc;
if (!config.usePlaceholders) {
// TODO: This can create active element segments with lots of nulls. We
// should optimize them like we do data segments with zeros.
elem = Builder(primary).makeRefNull(HeapType::nofunc);
return;
}
auto placeholder = std::make_unique<Function>();
placeholder->module = config.placeholderNamespacePrefix.toString() + "." +
secondaryName.toString();
placeholder->base = std::to_string(index);
placeholder->name = Names::getValidFunctionName(
primary, std::string("placeholder_") + placeholder->base.toString());
placeholder->hasExplicitName = true;
placeholder->type = secondaryFunc->type.with(Inexact);
elem = Builder(primary).makeRefFunc(placeholder->name, placeholder->type);
primary.addFunction(std::move(placeholder));
});
if (moduleToReplacedElems.size() == 0) {
// No placeholders to patch out of the table
return;
}
for (auto& [secondaryPtr, replacedElems] : moduleToReplacedElems) {
Module& secondary = *secondaryPtr;
auto secondaryTable =
ModuleUtils::copyTable(tableManager.activeTable, secondary);
if (tableManager.activeBase.global.size()) {
assert(tableManager.activeTableSegments.size() == 1 &&
"Unexpected number of segments with non-const base");
assert(secondary.tables.size() == 1 && secondary.elementSegments.empty());
// Since addition is not currently allowed in initializer expressions, we
// need to start the new secondary segment where the primary segment
// starts. The secondary segment will contain the same primary functions
// as the primary module except in positions where it needs to overwrite a
// placeholder function. All primary functions in the table therefore need
// to be imported into the second module. TODO: use better strategies
// here, such as using ref.func in the start function or standardizing
// addition in initializer expressions.
ElementSegment* primarySeg = tableManager.activeTableSegments.front();
std::vector<Expression*> secondaryElems;
secondaryElems.reserve(primarySeg->data.size());
// Copy functions from the primary segment to the secondary segment,
// replacing placeholders and creating new exports and imports as
// necessary.
auto replacement = replacedElems.begin();
for (Index i = 0;
i < primarySeg->data.size() && replacement != replacedElems.end();
++i) {
if (replacement->first == i) {
// primarySeg->data[i] is a placeholder, so use the secondary
// function.
auto* func = replacement->second;
auto* ref = Builder(secondary).makeRefFunc(func->name, func->type);
secondaryElems.push_back(ref);
++replacement;
} else if (auto* get = primarySeg->data[i]->dynCast<RefFunc>()) {
exportImportFunction(get->func, {&secondary});
auto* copied =
ExpressionManipulator::copy(primarySeg->data[i], secondary);
secondaryElems.push_back(copied);
}
}
auto offset = ExpressionManipulator::copy(primarySeg->offset, secondary);
auto secondarySeg = std::make_unique<ElementSegment>(
secondaryTable->name, offset, secondaryTable->type, secondaryElems);
secondarySeg->setName(primarySeg->name, primarySeg->hasExplicitName);
secondary.addElementSegment(std::move(secondarySeg));
return;
}
// Create active table segments in the secondary module to patch in the
// original functions when it is instantiated.
Index currBase = replacedElems.begin()->first;
std::vector<Expression*> currData;
auto finishSegment = [&]() {
auto* offset = Builder(secondary).makeConst(
Literal::makeFromInt32(currBase, secondaryTable->addressType));
auto secondarySeg = std::make_unique<ElementSegment>(
secondaryTable->name, offset, secondaryTable->type, currData);
Name name = Names::getValidElementSegmentName(
secondary, Name::fromInt(secondary.elementSegments.size()));
secondarySeg->setName(name, false);
secondary.addElementSegment(std::move(secondarySeg));
};
for (auto curr = replacedElems.begin(); curr != replacedElems.end();
++curr) {
if (curr->first != currBase + currData.size()) {
finishSegment();
currBase = curr->first;
currData.clear();
}
auto* func = curr->second;
currData.push_back(
Builder(secondary).makeRefFunc(func->name, func->type));
}
if (currData.size()) {
finishSegment();
}
}
}
void ModuleSplitter::shareImportableItems() {
// Map internal names to (one of) their corresponding export names. Don't
// consider functions because they have already been imported and exported as
// necessary.
std::unordered_map<std::pair<ExternalKind, Name>, Name> exports;
for (auto& ex : primary.exports) {
if (ex->kind != ExternalKind::Function) {
if (auto* name = ex->getInternalName()) {
exports[std::make_pair(ex->kind, *name)] = ex->name;
}
}
}
auto makeImportExport = [&](Importable& primaryItem,
Importable& secondaryItem,
const std::string& genericExportName,
ExternalKind kind) {
secondaryItem.name = primaryItem.name;
secondaryItem.hasExplicitName = primaryItem.hasExplicitName;
secondaryItem.module = config.importNamespace;
auto exportIt = exports.find(std::make_pair(kind, primaryItem.name));
if (exportIt != exports.end()) {
secondaryItem.base = exportIt->second;
} else {
std::string baseName =
config.newExportPrefix + (config.minimizeNewExportNames
? minified.getName()
: genericExportName);
Name exportName = Names::getValidExportName(primary, baseName);
primary.addExport(new Export(exportName, kind, primaryItem.name));
secondaryItem.base = exportName;
exports[std::make_pair(kind, primaryItem.name)] = exportName;
}
};
// TODO: Be more selective by only sharing global items that are actually used
// in the secondary module, just like we do for functions.
for (auto& secondaryPtr : secondaries) {
Module& secondary = *secondaryPtr;
for (auto& memory : primary.memories) {
auto secondaryMemory = ModuleUtils::copyMemory(memory.get(), secondary);
makeImportExport(
*memory, *secondaryMemory, "memory", ExternalKind::Memory);
}
for (auto& table : primary.tables) {
auto secondaryTable = secondary.getTableOrNull(table->name);
if (!secondaryTable) {
secondaryTable = ModuleUtils::copyTable(table.get(), secondary);
}
makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table);
}
for (auto& global : primary.globals) {
if (global->mutable_) {
assert(primary.features.hasMutableGlobals() &&
"TODO: add wrapper functions for disallowed mutable globals");
}
auto secondaryGlobal = std::make_unique<Global>();
secondaryGlobal->type = global->type;
secondaryGlobal->mutable_ = global->mutable_;
secondaryGlobal->init =
global->init == nullptr
? nullptr
: ExpressionManipulator::copy(global->init, secondary);
makeImportExport(
*global, *secondaryGlobal, "global", ExternalKind::Global);
secondary.addGlobal(std::move(secondaryGlobal));
}
for (auto& tag : primary.tags) {
auto secondaryTag = std::make_unique<Tag>();
secondaryTag->type = tag->type;
makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag);
secondary.addTag(std::move(secondaryTag));
}
}
}
void ModuleSplitter::removeUnusedSecondaryElements() {
// TODO: It would be better to be more selective about only exporting and
// importing those items that the secondary module needs. This would reduce
// code size in the primary module as well.
for (auto& secondaryPtr : secondaries) {
PassRunner runner(secondaryPtr.get());
// Do not validate here in the middle, as the IR still needs updating later.
runner.options.validate = false;
runner.add("remove-unused-module-elements");
runner.run();
}
}
} // anonymous namespace
Results splitFunctions(Module& primary, const Config& config) {
ModuleSplitter split(primary, config);
return {std::move(split.secondaries), std::move(split.placeholderMap)};
}
} // namespace wasm::ModuleSplitting