blob: d20df370872f258903a4313e1c7b66b7b3532a48 [file] [log] [blame] [edit]
/*
* Copyright 2025 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ir/module-utils.h"
#include "pass.h"
#include "support/file.h"
#include "support/json.h"
#include "wasm.h"
namespace wasm {
constexpr Index NodeTypeIndex = 0;
constexpr Index NodeNameIndex = 1;
constexpr Index NodeIdIndex = 2;
constexpr Index NodeSelfSizeIndex = 3;
constexpr Index NodeEdgeCountIndex = 4;
constexpr Index NodeDetachednessIndex = 5;
constexpr Index NodeFieldCount = 6;
constexpr Index EdgeTypeIndex = 0;
constexpr Index EdgeNameIndex = 1;
constexpr Index EdgeDestIndex = 2;
constexpr Index EdgeFieldCount = 3;
constexpr Index DisplayedTypesCount = 10;
struct Node {
IString name;
Index size;
std::vector<Index> edges;
};
struct HeapSnapshotAnalysis : Pass {
void run(Module* wasm) override {
auto snapshotFile =
getArgument("heap-snapshot-analysis",
"Heap snapshot analysis requires heap shapshot "
"file argument [--heap-snapshot-analysis=FILE]");
auto snapshotData = read_file<std::vector<char>>(snapshotFile, Flags::Text);
std::cout << "snapshot length " << snapshotData.size() << "\n";
// TODO: We would prefer UTF-8.
json::Value snapshot;
snapshot.parse(snapshotData.data(), json::Value::StringEncoding::ASCII);
std::cout << "parsed snapshot\n";
validateSnapshotMeta(snapshot);
auto nodes = getGraph(snapshot);
Index totalHeapSize = 0;
Index totalWasmHeapSize = 0;
Index totalWasmAllocations = 0;
std::unordered_map<IString, Index> heapSizeByType;
std::unordered_map<IString, Index> allocationsByType;
for (auto& node : nodes) {
totalHeapSize += node.size;
if (maybeGetWasmTypeIndex(node.name)) {
totalWasmHeapSize += node.size;
heapSizeByType[node.name] += node.size;
++totalWasmAllocations;
++allocationsByType[node.name];
}
}
auto sortData = [](auto begin, auto end) {
std::vector<std::pair<IString, Index>> pairs(begin, end);
std::sort(pairs.begin(), pairs.end(), [](auto a, auto b) {
if (a.second != b.second) {
return a.second > b.second;
}
return a.first < b.first;
});
return pairs;
};
auto topTypesByHeapSize =
sortData(heapSizeByType.begin(), heapSizeByType.end());
auto topTypesByAllocations =
sortData(allocationsByType.begin(), allocationsByType.end());
std::cout << "total heap size is " << totalHeapSize << "\n";
std::cout << "wasm heap size is " << totalWasmHeapSize << " ("
<< (double(totalWasmHeapSize) / double(totalHeapSize) * 100)
<< "%)\n";
std::cout << "there are " << totalWasmAllocations
<< " wasm objects (average "
<< (double(totalWasmHeapSize) / double(totalWasmAllocations))
<< " bytes per object)\n";
std::cout << "\ntop types by total heap size:\n";
for (Index i = 0; i < DisplayedTypesCount; ++i) {
if (i >= topTypesByHeapSize.size()) {
break;
}
auto [name, size] = topTypesByHeapSize[i];
std::cout << " " << name << ": " << size << " bytes\n";
}
std::cout << "\ntop types by allocations:\n";
for (Index i = 0; i < DisplayedTypesCount; ++i) {
if (i >= topTypesByAllocations.size()) {
break;
}
auto [name, allocs] = topTypesByAllocations[i];
std::cout << " " << name << ": " << allocs << " objects\n";
}
// Measure the savings of removing vtable pointers. Assume any object that
// contains an edge to another object that contains an edge to a "system /
// WasmFuncRef" contains a vtable pointer.
IString funcrefName = "system / WasmFuncRef";
std::unordered_set<Index> vtableIndices;
for (Index i = 0; i < nodes.size(); ++i) {
for (Index j = 0; j < nodes[i].edges.size(); ++j) {
auto dest = nodes[i].edges[j];
if (dest >= nodes.size()) {
Fatal() << "Unexpected out-of-bounds edge (" << dest
<< " >= " << nodes.size() << ")";
}
if (nodes[dest].name == funcrefName) {
vtableIndices.insert(i);
break;
}
}
}
Index objectsWithVtableCount = 0;
for (Index i = 0; i < nodes.size(); ++i) {
for (Index j = 0; j < nodes[i].edges.size(); ++j) {
auto dest = nodes[i].edges[j];
assert(dest < nodes.size());
if (vtableIndices.count(dest)) {
++objectsWithVtableCount;
break;
}
}
}
Index vtableSavings = objectsWithVtableCount * 4;
std::cout << "\nRemoving vtable fields would save " << vtableSavings
<< " bytes ("
<< (double(vtableSavings) / double(totalWasmHeapSize) * 100)
<< "% wasm heap, "
<< (double(vtableSavings) / double(totalHeapSize) * 100)
<< "% total heap)\n";
}
void validateSnapshotMeta(json::Value& snapshotRoot) {
auto snapshot = snapshotRoot.maybeGet("snapshot");
if (!snapshot) {
Fatal() << "Expected .snapshot to exist";
}
auto meta = snapshot->maybeGet("meta");
if (!meta) {
Fatal() << "Expected .snapshot.meta to exist";
}
validateNodeFields(meta);
validateEdgeFields(meta);
}
void validateNodeFields(json::Ref& meta) {
auto nodeFields = meta->maybeGet("node_fields");
if (!nodeFields) {
Fatal() << "Expected .snapshot.meta.node_fields to exist";
}
if (!nodeFields->isArray()) {
Fatal() << "Expected .snapshot.meta.node_fields to be an array";
}
auto& nodeFieldsArray = nodeFields->getArray();
auto expectNodeField = [&](unsigned i, IString field) {
if (nodeFieldsArray.size() <= i || !nodeFieldsArray[i]->isString() ||
nodeFieldsArray[i]->getIString() != field) {
Fatal() << "Expected .snapshot.meta.node_fields[" << i << "] to be \""
<< field << '"';
}
};
expectNodeField(NodeTypeIndex, "type");
expectNodeField(NodeNameIndex, "name");
expectNodeField(NodeIdIndex, "id");
expectNodeField(NodeSelfSizeIndex, "self_size");
expectNodeField(NodeEdgeCountIndex, "edge_count");
expectNodeField(NodeDetachednessIndex, "detachedness");
if (nodeFieldsArray.size() != NodeFieldCount) {
Fatal() << "Unexpected fields in .snapshot.meta.node_fields";
}
}
void validateEdgeFields(json::Ref& meta) {
json::Ref edgeFields = meta->maybeGet("edge_fields");
if (!edgeFields) {
Fatal() << "Expected .snapshot.meta.edge_fields to exist";
}
if (!edgeFields->isArray()) {
Fatal() << "Expected .snapshot.meta.edge_fields to be an array";
}
auto& edgeFieldsArray = edgeFields->getArray();
auto expectEdgeField = [&](unsigned i, IString field) {
if (edgeFieldsArray.size() <= i || !edgeFieldsArray[i]->isString() ||
edgeFieldsArray[i]->getIString() != field) {
Fatal() << "Expected .snapshot.meta.edge_fields[" << i << "] to be \""
<< field << '"';
}
};
expectEdgeField(EdgeTypeIndex, "type");
expectEdgeField(EdgeNameIndex, "name_or_index");
expectEdgeField(EdgeDestIndex, "to_node");
if (edgeFieldsArray.size() != EdgeFieldCount) {
Fatal() << "Unexpected fields in .snapshot.meta.edge_fields";
}
}
std::vector<Node> getGraph(json::Value& snapshotRoot) {
auto nodes = getNodes(snapshotRoot);
auto edges = getEdges(snapshotRoot);
auto strings = getStrings(snapshotRoot);
if (nodes.size() % NodeFieldCount != 0) {
Fatal() << "Expected length of .nodes to be a multiple of "
<< NodeFieldCount;
}
if (edges.size() % EdgeFieldCount != 0) {
Fatal() << "Expected length of .edges to be a multiple of "
<< EdgeFieldCount;
}
std::vector<Node> results;
Index currNode = 0, currEdge = 0;
for (; currNode < nodes.size(); currNode += NodeFieldCount) {
Index nameIndex = nodes[currNode + NodeNameIndex];
if (nameIndex >= strings.size()) {
Fatal() << "Node name index " << nameIndex
<< " is out of bounds at .nodes[" << (currNode + NodeNameIndex)
<< ']';
}
IString name = strings[nameIndex];
Index size = nodes[currNode + NodeSelfSizeIndex];
Index edgeCount = nodes[currNode + NodeEdgeCountIndex];
Index edgeFieldCount = edgeCount * EdgeFieldCount;
if (edgeFieldCount < edgeCount || edgeFieldCount > edges.size() ||
currEdge > edges.size() - edgeFieldCount) {
Fatal() << "Edge count " << edgeCount << " is out of bounds at .nodes["
<< (currNode + NodeEdgeCountIndex) << ']';
}
std::vector<Index> destinations;
destinations.reserve(edgeCount);
for (Index i = 0; i < edgeCount; ++i) {
Index dest = edges[currEdge + EdgeDestIndex];
if (dest % NodeFieldCount != 0) {
Fatal() << "Expected to_node index to be a multiple of "
<< NodeFieldCount << "\n";
}
dest /= NodeFieldCount;
destinations.push_back(dest);
currEdge += EdgeFieldCount;
}
results.emplace_back(Node{name, size, std::move(destinations)});
}
if (currEdge != edges.size()) {
Fatal() << "Unexpected extra edges";
}
return results;
}
std::vector<Index> getNodes(json::Value& snapshotRoot) {
auto nodes = snapshotRoot.maybeGet("nodes");
if (!nodes) {
Fatal() << "Expected .nodes to exist";
}
return getIndices(nodes, ".nodes");
}
std::vector<Index> getEdges(json::Value& snapshotRoot) {
auto edges = snapshotRoot.maybeGet("edges");
if (!edges) {
Fatal() << "Expected .edges to exist";
}
return getIndices(edges, ".edges");
}
std::vector<Index> getIndices(json::Ref& array, const char* name) {
if (!array->isArray()) {
Fatal() << "Expected " << name << " to be an array";
}
auto& refs = array->getArray();
std::vector<Index> results;
for (size_t i = 0; i < refs.size(); ++i) {
if (!refs[i]->isNumber()) {
Fatal() << "Expected " << name << "[" << i << "] to be a number";
}
// TODO: Bounds checks, etc.
results.push_back(Index(refs[i]->getNumber()));
}
return results;
}
std::vector<IString> getStrings(json::Value& snapshotRoot) {
auto strings = snapshotRoot.maybeGet("strings");
if (!strings) {
Fatal() << "Expected .strings to exist";
}
if (!strings->isArray()) {
Fatal() << "Expected .strings to be an array";
}
auto& stringRefs = strings->getArray();
std::vector<IString> results;
for (size_t i = 0; i < stringRefs.size(); ++i) {
if (!stringRefs[i]->isString()) {
Fatal() << "Expected .strings[" << i << "] to be a string";
}
results.push_back(stringRefs[i]->getIString());
}
return results;
}
std::optional<Index> maybeGetWasmTypeIndex(IString name) {
// Nodes for Wasm heap objects have names like `$canonNN (wasm)`. Parse
// and return the NN as an index if the name matches that format.
constexpr std::string_view prefix = "$canon";
constexpr std::string_view suffix = " (wasm)";
// TODO: Use C++20 `starts_with`
if (name.str.substr(0, prefix.size()) != prefix) {
return std::nullopt;
}
if (name.str.substr(name.str.size() - suffix.size()) != suffix) {
return std::nullopt;
}
size_t count = name.str.size() - prefix.size() - suffix.size();
std::string_view index = name.str.substr(prefix.size(), count);
Index result = 0;
for (char c : index) {
if (!std::isdigit(c)) {
return std::nullopt;
}
result = result * 10 + (c - '0');
}
return result;
}
};
Pass* createHeapSnapshotAnalysisPass() { return new HeapSnapshotAnalysis(); }
} // namespace wasm