blob: 4804322b61924edd5973fadc6582c861d5b62fad [file] [log] [blame] [edit]
* Copyright 2015 WebAssembly Community Group participants
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
// wasm.h: WebAssembly representation and processing library, in one
// header file.
// This represents WebAssembly in an AST format, with a focus on making
// it easy to not just inspect but also to process. For example, some
// things that this enables are:
// * Interpreting: See wasm-interpreter.h.
// * Optimizing: See asm2wasm.h, which performs some optimizations
// after code generation.
// * Validation: See wasm-validator.h.
// * Pretty-printing: See Print.cpp.
// wasm.js internal WebAssembly representation design:
// * Unify where possible. Where size isn't a concern, combine
// classes, so binary ops and relational ops are joined. This
// simplifies that AST and makes traversals easier.
// * Optimize for size? This might justify separating if and if_else
// (so that if doesn't have an always-empty else; also it avoids
// a branch).
#ifndef wasm_wasm_h
#define wasm_wasm_h
#include <algorithm>
#include <cassert>
#include <map>
#include <string>
#include <vector>
#include "literal.h"
#include "mixed_arena.h"
#include "support/name.h"
#include "wasm-type.h"
namespace wasm {
// An index in a wasm module
typedef uint32_t Index;
// An address in linear memory. For now only wasm32
struct Address {
typedef uint32_t address_t;
address_t addr;
Address() : addr(0) {}
Address(uint64_t a) : addr(static_cast<address_t>(a)) {
assert(a <= std::numeric_limits<address_t>::max());
Address& operator=(uint64_t a) {
assert(a <= std::numeric_limits<address_t>::max());
addr = static_cast<address_t>(a);
return *this;
operator address_t() const { return addr; }
Address& operator++() { ++addr; return *this; }
// An offset into memory
typedef int32_t Offset;
// Types
// Operators
enum UnaryOp {
ClzInt32, ClzInt64, CtzInt32, CtzInt64, PopcntInt32, PopcntInt64, // int
NegFloat32, NegFloat64, AbsFloat32, AbsFloat64, CeilFloat32, CeilFloat64, FloorFloat32, FloorFloat64, TruncFloat32, TruncFloat64, NearestFloat32, NearestFloat64, SqrtFloat32, SqrtFloat64, // float
// relational
EqZInt32, EqZInt64,
// conversions
ExtendSInt32, ExtendUInt32, // extend i32 to i64
WrapInt64, // i64 to i32
TruncSFloat32ToInt32, TruncSFloat32ToInt64, TruncUFloat32ToInt32, TruncUFloat32ToInt64, TruncSFloat64ToInt32, TruncSFloat64ToInt64, TruncUFloat64ToInt32, TruncUFloat64ToInt64, // float to int
ReinterpretFloat32, ReinterpretFloat64, // reintepret bits to int
ConvertSInt32ToFloat32, ConvertSInt32ToFloat64, ConvertUInt32ToFloat32, ConvertUInt32ToFloat64, ConvertSInt64ToFloat32, ConvertSInt64ToFloat64, ConvertUInt64ToFloat32, ConvertUInt64ToFloat64, // int to float
PromoteFloat32, // f32 to f64
DemoteFloat64, // f64 to f32
ReinterpretInt32, ReinterpretInt64, // reinterpret bits to float
enum BinaryOp {
AddInt32, SubInt32, MulInt32, // int or float
DivSInt32, DivUInt32, RemSInt32, RemUInt32, AndInt32, OrInt32, XorInt32, ShlInt32, ShrUInt32, ShrSInt32, RotLInt32, RotRInt32, // int
// relational ops
EqInt32, NeInt32, // int or float
LtSInt32, LtUInt32, LeSInt32, LeUInt32, GtSInt32, GtUInt32, GeSInt32, GeUInt32, // int
AddInt64, SubInt64, MulInt64, // int or float
DivSInt64, DivUInt64, RemSInt64, RemUInt64, AndInt64, OrInt64, XorInt64, ShlInt64, ShrUInt64, ShrSInt64, RotLInt64, RotRInt64, // int
// relational ops
EqInt64, NeInt64, // int or float
LtSInt64, LtUInt64, LeSInt64, LeUInt64, GtSInt64, GtUInt64, GeSInt64, GeUInt64, // int
AddFloat32, SubFloat32, MulFloat32, // int or float
DivFloat32, CopySignFloat32, MinFloat32, MaxFloat32, // float
// relational ops
EqFloat32, NeFloat32, // int or float
LtFloat32, LeFloat32, GtFloat32, GeFloat32, // float
AddFloat64, SubFloat64, MulFloat64, // int or float
DivFloat64, CopySignFloat64, MinFloat64, MaxFloat64, // float
// relational ops
EqFloat64, NeFloat64, // int or float
LtFloat64, LeFloat64, GtFloat64, GeFloat64, // float
enum HostOp {
PageSize, CurrentMemory, GrowMemory, HasFeature
// Expressions
// Note that little is provided in terms of constructors for these. The rationale
// is that writing new Something(a, b, c, d, e) is not the clearest, and it would
// be better to write new Something(name=a, leftOperand=b... etc., but C++
// lacks named operands, so in asm2wasm etc. you will see things like
// auto x = new Something();
// x->name = a;
// x->leftOperand = b;
// ..
// which is less compact but less ambiguous. See wasm-builder.h for a more
// friendly API for building nodes.
// Most nodes have no need of internal allocation, and when arena-allocated
// they drop the provided arena on the floor. You can create random instances
// of those that are not in an arena without issue. However, the nodes that
// have internal allocation will need an allocator provided to them in order
// to be constructed.
class Expression {
enum Id {
InvalidId = 0,
Id _id;
WasmType type; // the type of the expression: its *output*, not necessarily its input(s)
Expression(Id id) : _id(id), type(none) {}
void finalize() {}
template<class T>
bool is() {
return int(_id) == int(T::SpecificId);
template<class T>
T* dynCast() {
return int(_id) == int(T::SpecificId) ? (T*)this : nullptr;
template<class T>
T* cast() {
assert(int(_id) == int(T::SpecificId));
return (T*)this;
const char* getExpressionName(Expression* curr);
typedef ArenaVector<Expression*> ExpressionList;
template<Expression::Id SID>
class SpecificExpression : public Expression {
enum {
SpecificId = SID // compile-time access to the type for the class
SpecificExpression() : Expression(SID) {}
class Nop : public SpecificExpression<Expression::NopId> {
Nop() {}
Nop(MixedArena& allocator) {}
class Block : public SpecificExpression<Expression::BlockId> {
Block(MixedArena& allocator) : list(allocator) {}
Name name;
ExpressionList list;
// set the type given you know its type, which is the case when parsing
// s-expression or binary, as explicit types are given. the only additional work
// this does is to set the type to unreachable in the cases that is needed.
void finalize(WasmType type_);
// set the type purely based on its contents. this scans the block, so it is not fast
void finalize();
class If : public SpecificExpression<Expression::IfId> {
If() : ifFalse(nullptr) {}
If(MixedArena& allocator) : If() {}
Expression* condition;
Expression* ifTrue;
Expression* ifFalse;
// set the type given you know its type, which is the case when parsing
// s-expression or binary, as explicit types are given. the only additional work
// this does is to set the type to unreachable in the cases that is needed.
void finalize(WasmType type_);
// set the type purely based on its contents.
void finalize();
class Loop : public SpecificExpression<Expression::LoopId> {
Loop() {}
Loop(MixedArena& allocator) {}
Name name;
Expression* body;
// set the type given you know its type, which is the case when parsing
// s-expression or binary, as explicit types are given. the only additional work
// this does is to set the type to unreachable in the cases that is needed.
void finalize(WasmType type_);
// set the type purely based on its contents.
void finalize();
class Break : public SpecificExpression<Expression::BreakId> {
Break() : value(nullptr), condition(nullptr) {}
Break(MixedArena& allocator) : Break() {
type = unreachable;
Name name;
Expression* value;
Expression* condition;
void finalize();
class Switch : public SpecificExpression<Expression::SwitchId> {
Switch(MixedArena& allocator) : targets(allocator), condition(nullptr), value(nullptr) {
type = unreachable;
ArenaVector<Name> targets;
Name default_;
Expression* condition;
Expression* value;
class Call : public SpecificExpression<Expression::CallId> {
Call(MixedArena& allocator) : operands(allocator) {}
ExpressionList operands;
Name target;
class CallImport : public SpecificExpression<Expression::CallImportId> {
CallImport(MixedArena& allocator) : operands(allocator) {}
ExpressionList operands;
Name target;
class FunctionType {
Name name;
WasmType result;
std::vector<WasmType> params;
FunctionType() : result(none) {}
bool structuralComparison(FunctionType& b);
bool operator==(FunctionType& b);
bool operator!=(FunctionType& b);
class CallIndirect : public SpecificExpression<Expression::CallIndirectId> {
CallIndirect(MixedArena& allocator) : operands(allocator) {}
ExpressionList operands;
Name fullType;
Expression* target;
class GetLocal : public SpecificExpression<Expression::GetLocalId> {
GetLocal() {}
GetLocal(MixedArena& allocator) {}
Index index;
class SetLocal : public SpecificExpression<Expression::SetLocalId> {
SetLocal() {}
SetLocal(MixedArena& allocator) {}
Index index;
Expression* value;
bool isTee();
void setTee(bool is);
class GetGlobal : public SpecificExpression<Expression::GetGlobalId> {
GetGlobal() {}
GetGlobal(MixedArena& allocator) {}
Name name;
class SetGlobal : public SpecificExpression<Expression::SetGlobalId> {
SetGlobal() {}
SetGlobal(MixedArena& allocator) {}
Name name;
Expression* value;
class Load : public SpecificExpression<Expression::LoadId> {
Load() {}
Load(MixedArena& allocator) {}
uint8_t bytes;
bool signed_;
Address offset;
Address align;
Expression* ptr;
// type must be set during creation, cannot be inferred
class Store : public SpecificExpression<Expression::StoreId> {
Store() : valueType(none) {}
Store(MixedArena& allocator) : Store() {}
uint8_t bytes;
Address offset;
Address align;
Expression* ptr;
Expression* value;
WasmType valueType; // the store never returns a value
void finalize();
class Const : public SpecificExpression<Expression::ConstId> {
Const() {}
Const(MixedArena& allocator) {}
Literal value;
Const* set(Literal value_);
class Unary : public SpecificExpression<Expression::UnaryId> {
Unary() {}
Unary(MixedArena& allocator) {}
UnaryOp op;
Expression* value;
bool isRelational();
void finalize();
class Binary : public SpecificExpression<Expression::BinaryId> {
Binary() {}
Binary(MixedArena& allocator) {}
BinaryOp op;
Expression* left;
Expression* right;
// the type is always the type of the operands,
// except for relationals
bool isRelational();
void finalize();
class Select : public SpecificExpression<Expression::SelectId> {
Select() {}
Select(MixedArena& allocator) {}
Expression* ifTrue;
Expression* ifFalse;
Expression* condition;
void finalize();
class Drop : public SpecificExpression<Expression::DropId> {
Drop() {}
Drop(MixedArena& allocator) {}
Expression* value;
class Return : public SpecificExpression<Expression::ReturnId> {
Return() : value(nullptr) {
type = unreachable;
Return(MixedArena& allocator) : Return() {}
Expression* value;
class Host : public SpecificExpression<Expression::HostId> {
Host(MixedArena& allocator) : operands(allocator) {}
HostOp op;
Name nameOperand;
ExpressionList operands;
void finalize();
class Unreachable : public SpecificExpression<Expression::UnreachableId> {
Unreachable() {
type = unreachable;
Unreachable(MixedArena& allocator) : Unreachable() {}
// Globals
class Function {
Name name;
WasmType result;
std::vector<WasmType> params; // function locals are
std::vector<WasmType> vars; // params plus vars
Name type; // if null, it is implicit in params and result
Expression* body;
// local names. these are optional.
std::vector<Name> localNames;
std::map<Name, Index> localIndices;
struct DebugLocation {
uint32_t fileIndex, lineNumber;
std::unordered_map<Expression*, DebugLocation> debugLocations;
Function() : result(none) {}
size_t getNumParams();
size_t getNumVars();
size_t getNumLocals();
bool isParam(Index index);
bool isVar(Index index);
Name getLocalName(Index index);
Index getLocalIndex(Name name);
Index getVarIndexBase();
WasmType getLocalType(Index index);
Name getLocalNameOrDefault(Index index);
bool hasLocalName(Index index) const;
enum class ExternalKind {
Function = 0,
Table = 1,
Memory = 2,
Global = 3
class Import {
Import() : globalType(none) {}
Name name, module, base; // name = module.base
ExternalKind kind;
Name functionType; // for Function imports
WasmType globalType; // for Global imports
class Export {
Name name; // exported name - note that this is the key, as the internal name is non-unique (can have multiple exports for an internal, also over kinds)
Name value; // internal name
ExternalKind kind;
class Table {
static const Address::address_t kPageSize = 1;
static const Index kMaxSize = Index(-1);
struct Segment {
Expression* offset;
std::vector<Name> data;
Segment() {}
Segment(Expression* offset) : offset(offset) {
Segment(Expression* offset, std::vector<Name>& init) : offset(offset) {
// Currently the wasm object always 'has' one Table. It 'exists' if it has been defined or imported.
// The table can exist but be empty and have no defined initial or max size.
bool exists;
bool imported;
Name name;
Address initial, max;
std::vector<Segment> segments;
Table() : exists(false), imported(false), initial(0), max(kMaxSize) {
name = Name::fromInt(0);
class Memory {
static const Address::address_t kPageSize = 64 * 1024;
static const Address::address_t kMaxSize = ~Address::address_t(0) / kPageSize;
static const Address::address_t kPageMask = ~(kPageSize - 1);
struct Segment {
Expression* offset;
std::vector<char> data; // TODO: optimize
Segment() {}
Segment(Expression* offset, const char* init, Address size) : offset(offset) {
std::copy_n(init, size, data.begin());
Segment(Expression* offset, std::vector<char>& init) : offset(offset) {
Name name;
Address initial, max; // sizes are in pages
std::vector<Segment> segments;
// See comment in Table.
bool exists;
bool imported;
Memory() : initial(0), max(kMaxSize), exists(false), imported(false) {
name = Name::fromInt(0);
class Global {
Name name;
WasmType type;
Expression* init;
bool mutable_;
// "Opaque" data, not part of the core wasm spec, that is held in binaries.
// May be parsed/handled by utility code elsewhere, but not in wasm.h
class UserSection {
std::string name;
std::vector<char> data;
class Module {
// wasm contents (generally you shouldn't access these from outside, except maybe for iterating; use add*() and the get() functions)
std::vector<std::unique_ptr<FunctionType>> functionTypes;
std::vector<std::unique_ptr<Import>> imports;
std::vector<std::unique_ptr<Export>> exports;
std::vector<std::unique_ptr<Function>> functions;
std::vector<std::unique_ptr<Global>> globals;
Table table;
Memory memory;
Name start;
std::vector<UserSection> userSections;
std::vector<std::string> debugInfoFileNames;
MixedArena allocator;
// TODO: add a build option where Names are just indices, and then these methods are not needed
std::map<Name, FunctionType*> functionTypesMap;
std::map<Name, Import*> importsMap;
std::map<Name, Export*> exportsMap; // exports map is by the *exported* name, which is unique
std::map<Name, Function*> functionsMap;
std::map<Name, Global*> globalsMap;
Module() {};
FunctionType* getFunctionType(Name name);
Import* getImport(Name name);
Export* getExport(Name name);
Function* getFunction(Name name);
Global* getGlobal(Name name);
FunctionType* getFunctionTypeOrNull(Name name);
Import* getImportOrNull(Name name);
Export* getExportOrNull(Name name);
Function* getFunctionOrNull(Name name);
Global* getGlobalOrNull(Name name);
void addFunctionType(FunctionType* curr);
void addImport(Import* curr);
void addExport(Export* curr);
void addFunction(Function* curr);
void addGlobal(Global* curr);
void addStart(const Name& s);
void removeImport(Name name);
// TODO: remove* for other elements
void updateMaps();
} // namespace wasm
namespace std {
template<> struct hash<wasm::Address> {
size_t operator()(const wasm::Address a) const {
return std::hash<wasm::Address::address_t>()(a.addr);
#endif // wasm_wasm_h