| /* |
| * Copyright 2015 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // .s to WebAssembly translator. |
| // |
| |
| #ifndef wasm_s2wasm_h |
| #define wasm_s2wasm_h |
| |
| #include "wasm.h" |
| #include "parsing.h" |
| #include "asm_v_wasm.h" |
| |
| namespace wasm { |
| |
| cashew::IString EMSCRIPTEN_ASM_CONST("emscripten_asm_const"); |
| |
| // |
| // S2WasmBuilder - parses a .s file into WebAssembly |
| // |
| |
| class S2WasmBuilder { |
| AllocatingModule& wasm; |
| MixedArena& allocator; |
| const char* s; |
| bool debug; |
| bool ignoreUnknownSymbols; |
| |
| public: |
| S2WasmBuilder(AllocatingModule& wasm, const char* input, bool debug, |
| size_t globalBase, bool ignoreUnknownSymbols) |
| : wasm(wasm), |
| allocator(wasm.allocator), |
| debug(debug), |
| ignoreUnknownSymbols(ignoreUnknownSymbols), |
| globalBase(globalBase), |
| nextStatic(globalBase) { |
| s = input; |
| scan(); |
| s = input; |
| prepare(); |
| process(); |
| fix(); |
| } |
| |
| private: |
| // state |
| |
| size_t globalBase, // where globals can start to be statically allocated, i.e., the data segment |
| nextStatic; // location of next static allocation |
| std::map<Name, int32_t> staticAddresses; // name => address |
| |
| struct Relocation { |
| uint32_t* data; |
| Name value; |
| int offset; |
| Relocation(uint32_t* data, Name value, int offset) : data(data), value(value), offset(offset) {} |
| }; |
| std::vector<Relocation> relocations; |
| |
| std::set<Name> implementedFunctions; |
| std::map<Name, Name> aliasedFunctions; |
| |
| std::map<size_t, size_t> addressSegments; // address => segment index |
| |
| std::map<Name, size_t> functionIndexes; |
| |
| // utilities |
| |
| void skipWhitespace() { |
| while (1) { |
| while (*s && isspace(*s)) s++; |
| if (*s != '#') break; |
| while (*s != '\n') s++; |
| } |
| } |
| |
| bool skipComma() { |
| skipWhitespace(); |
| if (*s != ',') return false; |
| s++; |
| skipWhitespace(); |
| return true; |
| } |
| |
| #define abort_on(why) { \ |
| dump(why ":"); \ |
| abort(); \ |
| } |
| |
| // match and skip the pattern, if matched |
| bool match(const char *pattern) { |
| size_t size = strlen(pattern); |
| if (strncmp(s, pattern, size) == 0) { |
| s += size; |
| skipWhitespace(); |
| return true; |
| } |
| return false; |
| } |
| |
| void mustMatch(const char *pattern) { |
| bool matched = match(pattern); |
| if (!matched) { |
| std::cerr << "<< " << pattern << " >>\n"; |
| abort_on("bad mustMatch"); |
| } |
| } |
| |
| void dump(const char *text) { |
| std::cerr << "[[" << text << "]]:\n==========\n"; |
| for (size_t i = 0; i < 60; i++) { |
| if (!s[i]) break; |
| std::cerr << s[i]; |
| } |
| std::cerr << "\n==========\n"; |
| } |
| |
| void unget(Name str) { |
| s -= strlen(str.str); |
| } |
| |
| Name getStr() { |
| std::string str; // TODO: optimize this and the other get* methods |
| while (*s && !isspace(*s)) { |
| str += *s; |
| s++; |
| } |
| return cashew::IString(str.c_str(), false); |
| } |
| |
| void skipToSep() { |
| while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+' && *s != '-') { |
| s++; |
| } |
| } |
| |
| Name getStrToSep() { |
| std::string str; |
| while (*s && !isspace(*s) && *s != ',' && *s != '(' && *s != ')' && *s != ':' && *s != '+' && *s != '-' && *s != '=') { |
| str += *s; |
| s++; |
| } |
| return cashew::IString(str.c_str(), false); |
| } |
| |
| Name getStrToColon() { |
| std::string str; |
| while (*s && !isspace(*s) && *s != ':') { |
| str += *s; |
| s++; |
| } |
| return cashew::IString(str.c_str(), false); |
| } |
| |
| // get an int |
| int32_t getInt() { |
| const char* loc = s; |
| uint32_t value = 0; |
| bool neg = false; |
| if (*loc == '-') { |
| neg = true; |
| loc++; |
| } |
| while (isdigit(*loc)) { |
| uint32_t digit = *loc - '0'; |
| if (value > std::numeric_limits<uint32_t>::max() / 10) { |
| abort_on("uint32_t overflow"); |
| } |
| value *= 10; |
| if (value > std::numeric_limits<uint32_t>::max() - digit) { |
| abort_on("uint32_t overflow"); |
| } |
| value += digit; |
| loc++; |
| } |
| if (neg) { |
| uint32_t positive_int_min = |
| (uint32_t) - (1 + std::numeric_limits<int32_t>::min()) + (uint32_t)1; |
| if (value > positive_int_min) { |
| abort_on("negative int32_t overflow"); |
| } |
| s = loc; |
| return -value; |
| } |
| s = loc; |
| return value; |
| } |
| |
| // get an int from an arbitrary string, with our full error handling |
| int32_t getInt(const char *from) { |
| const char *before = s; |
| s = from; |
| auto ret = getInt(); |
| s = before; |
| return ret; |
| } |
| |
| // gets a constant, which may be a relocation for later. |
| // returns whether this is a relocation |
| bool getConst(uint32_t* target) { |
| if (isdigit(*s) || *s == '-') { |
| *target = getInt(); |
| return false; |
| } else { |
| // a global constant, we need to fix it up later |
| Name name = cleanFunction(getStrToSep()); |
| int offset = 0; |
| if (*s == '+') { |
| s++; |
| offset = getInt(); |
| } else if (*s == '-') { |
| s++; |
| offset = -getInt(); |
| } |
| relocations.emplace_back(target, name, offset); |
| return true; |
| } |
| } |
| |
| int64_t getInt64() { |
| const char* loc = s; |
| uint64_t value = 0; |
| bool neg = false; |
| if (*loc == '-') { |
| neg = true; |
| loc++; |
| } |
| while (isdigit(*loc)) { |
| uint64_t digit = *loc - '0'; |
| if (value > std::numeric_limits<uint64_t>::max() / 10) { |
| abort_on("uint64_t overflow"); |
| } |
| value *= 10; |
| if (value > std::numeric_limits<uint64_t>::max() - digit) { |
| abort_on("uint64_t overflow"); |
| } |
| value += digit; |
| loc++; |
| } |
| if (neg) { |
| uint64_t positive_int_min = |
| (uint64_t) - (1 + std::numeric_limits<int64_t>::min()) + (uint64_t)1; |
| if (value > positive_int_min) { |
| abort_on("negative int64_t overflow"); |
| } |
| s = loc; |
| return -value; |
| } |
| s = loc; |
| return value; |
| } |
| |
| Name getSeparated(char separator) { |
| skipWhitespace(); |
| std::string str; |
| while (*s && *s != separator && *s != '\n') { |
| str += *s; |
| s++; |
| } |
| skipWhitespace(); |
| return cashew::IString(str.c_str(), false); |
| } |
| Name getCommaSeparated() { return getSeparated(','); } |
| Name getAtSeparated() { return getSeparated('@'); } |
| |
| Name getAssign() { |
| skipWhitespace(); |
| if (*s != '$') return Name(); |
| std::string str; |
| const char *before = s; |
| while (*s && *s != '=' && *s != '\n' && *s != ',') { |
| str += *s; |
| s++; |
| } |
| if (*s != '=') { // not an assign |
| s = before; |
| return Name(); |
| } |
| s++; |
| skipComma(); |
| return cashew::IString(str.c_str(), false); |
| } |
| |
| std::vector<char> getQuoted() { |
| assert(*s == '"'); |
| s++; |
| std::vector<char> str; |
| while (*s && *s != '\"') { |
| if (s[0] == '\\') { |
| switch (s[1]) { |
| case 'n': str.push_back('\n'); s += 2; continue; |
| case 'r': str.push_back('\r'); s += 2; continue; |
| case 't': str.push_back('\t'); s += 2; continue; |
| case 'f': str.push_back('\f'); s += 2; continue; |
| case 'b': str.push_back('\b'); s += 2; continue; |
| case '\\': str.push_back('\\'); s += 2; continue; |
| case '"': str.push_back('"'); s += 2; continue; |
| default: { |
| if (isdigit(s[1])) { |
| int code = (s[1] - '0')*8*8 + (s[2] - '0')*8 + (s[3] - '0'); |
| str.push_back(char(code)); |
| s += 4; |
| continue; |
| } else abort_on("getQuoted-escape"); |
| } |
| } |
| } |
| str.push_back(*s); |
| s++; |
| } |
| s++; |
| skipWhitespace(); |
| return str; |
| } |
| |
| WasmType getType() { |
| if (match("i32")) return i32; |
| if (match("i64")) return i64; |
| if (match("f32")) return f32; |
| if (match("f64")) return f64; |
| abort_on("getType"); |
| } |
| |
| // The LLVM backend emits function names as name@FUNCTION. We can drop the @ and after it. |
| Name cleanFunction(Name name) { |
| if (!strchr(name.str, '@')) return name; |
| char *temp = strdup(name.str); |
| *strchr(temp, '@') = 0; |
| Name ret = cashew::IString(temp, false); |
| free(temp); |
| return ret; |
| } |
| |
| // processors |
| |
| void scan() { |
| while (*s) { |
| s = strstr(s, "\n .type "); |
| if (!s) break; |
| mustMatch("\n .type "); |
| Name name = getCommaSeparated(); |
| skipComma(); |
| if (!match("@function")) continue; |
| if (match(".hidden")) mustMatch(name.str); |
| mustMatch(name.str); |
| if (match(":")) { |
| implementedFunctions.insert(name); |
| } else if (match("=")) { |
| Name alias = getAtSeparated(); |
| mustMatch("@FUNCTION"); |
| aliasedFunctions.insert({name, alias}); |
| } else { |
| abort_on("unknown directive"); |
| } |
| } |
| } |
| |
| void prepare() { |
| assert(nextStatic == globalBase); // we are the first allocation |
| staticAddresses["__stack_pointer"] = nextStatic; |
| nextStatic += 4; |
| } |
| |
| void process() { |
| while (*s) { |
| skipWhitespace(); |
| if (debug) dump("process"); |
| if (!*s) break; |
| if (*s != '.') break; |
| s++; |
| if (match("text")) parseText(); |
| else if (match("type")) parseType(); |
| else if (match("weak") || match("hidden") || match("protected") || match("internal")) getStr(); // contents are in the content that follows |
| else if (match("imports")) skipImports(); |
| else if (match("data")) {} |
| else if (match("ident")) {} |
| else if (match("section") || match("align") || match("p2align")) s = strchr(s, '\n'); |
| else if (match("Lfunc_end")) { |
| // skip the next line, which has a .size we can ignore |
| s = strstr(s, ".size"); |
| s = strchr(s, '\n'); |
| } else if (match("globl")) parseGlobl(); |
| else abort_on("process"); |
| } |
| } |
| |
| void parseText() { |
| while (*s) { |
| skipWhitespace(); |
| if (!*s) break; |
| if (*s != '.') break; |
| s++; |
| if (parseVersionMin()); |
| else if (match("file")) parseFile(); |
| else if (match("globl")) parseGlobl(); |
| else if (match("type")) parseType(); |
| else { |
| s--; |
| break; |
| } |
| } |
| } |
| |
| void parseFile() { |
| assert(*s == '"'); |
| s++; |
| std::string filename; |
| while (*s != '"') { |
| filename += *s; |
| s++; |
| } |
| s++; |
| // TODO: use the filename? |
| } |
| |
| void parseGlobl() { |
| (void)getStr(); |
| skipWhitespace(); |
| } |
| |
| bool parseVersionMin() { |
| if (match("watchos_version_min") || match("tvos_version_min") || match("ios_version_min") || match("macosx_version_min")) { |
| s = strchr(s, '\n'); |
| skipWhitespace(); |
| return true; |
| } else |
| return false; |
| } |
| |
| void parseFunction() { |
| if (debug) dump("func"); |
| Name name = getStrToSep(); |
| if (match(" =")) { |
| /* alias = */ getAtSeparated(); |
| mustMatch("@FUNCTION"); |
| return; |
| } |
| |
| mustMatch(":"); |
| |
| unsigned nextId = 0; |
| auto getNextId = [&nextId]() { |
| return cashew::IString(('$' + std::to_string(nextId++)).c_str(), false); |
| }; |
| |
| auto func = allocator.alloc<Function>(); |
| func->name = name; |
| std::map<Name, WasmType> localTypes; |
| // params and result |
| while (1) { |
| if (match(".param")) { |
| while (1) { |
| Name name = getNextId(); |
| WasmType type = getType(); |
| func->params.emplace_back(name, type); |
| localTypes[name] = type; |
| skipWhitespace(); |
| if (!match(",")) break; |
| } |
| } else if (match(".result")) { |
| func->result = getType(); |
| } else if (match(".local")) { |
| while (1) { |
| Name name = getNextId(); |
| WasmType type = getType(); |
| func->locals.emplace_back(name, type); |
| localTypes[name] = type; |
| skipWhitespace(); |
| if (!match(",")) break; |
| } |
| } else break; |
| } |
| // parse body |
| func->body = allocator.alloc<Block>(); |
| std::vector<Expression*> bstack; |
| auto addToBlock = [&bstack](Expression* curr) { |
| Expression* last = bstack.back(); |
| if (last->is<Loop>()) { |
| last = last->cast<Loop>()->body; |
| } |
| last->cast<Block>()->list.push_back(curr); |
| }; |
| bstack.push_back(func->body); |
| std::vector<Expression*> estack; |
| auto push = [&](Expression* curr) { |
| //std::cerr << "push " << curr << '\n'; |
| estack.push_back(curr); |
| }; |
| auto pop = [&]() { |
| assert(!estack.empty()); |
| Expression* ret = estack.back(); |
| assert(ret); |
| estack.pop_back(); |
| //std::cerr << "pop " << ret << '\n'; |
| return ret; |
| }; |
| auto getNumInputs = [&]() { |
| int ret = 1; |
| const char *t = s; |
| while (*t != '\n') { |
| if (*t == ',') ret++; |
| t++; |
| } |
| return ret; |
| }; |
| auto getInputs = [&](int num) { |
| // we may have $pop, $0, $pop, $1 etc., which are getlocals |
| // interleaved with stack pops, and the stack pops must be done in |
| // *reverse* order, i.e., that input should turn into |
| // lastpop, getlocal(0), firstpop, getlocal(1) |
| std::vector<Expression*> inputs; // TODO: optimize (if .s format doesn't change) |
| inputs.resize(num); |
| for (int i = 0; i < num; i++) { |
| if (match("$pop")) { |
| skipToSep(); |
| inputs[i] = nullptr; |
| } else { |
| auto curr = allocator.alloc<GetLocal>(); |
| curr->name = getStrToSep(); |
| curr->type = localTypes[curr->name]; |
| inputs[i] = curr; |
| } |
| if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' |
| if (*s == ':') { // tolerate :attribute=value syntax (see getAttributes) |
| s++; |
| skipToSep(); |
| } |
| if (i < num - 1) skipComma(); |
| } |
| for (int i = num-1; i >= 0; i--) { |
| if (inputs[i] == nullptr) inputs[i] = pop(); |
| } |
| return inputs; |
| }; |
| auto getInput = [&]() { |
| return getInputs(1)[0]; |
| }; |
| auto setOutput = [&](Expression* curr, Name assign) { |
| if (assign.isNull() || assign.str[1] == 'd') { // discard |
| addToBlock(curr); |
| } else if (assign.str[1] == 'p') { // push |
| push(curr); |
| } else { // set to a local |
| auto set = allocator.alloc<SetLocal>(); |
| set->name = assign; |
| set->value = curr; |
| set->type = curr->type; |
| addToBlock(set); |
| } |
| }; |
| auto getAttributes = [&](int num) { |
| const char *before = s; |
| std::vector<const char*> attributes; // TODO: optimize (if .s format doesn't change) |
| attributes.resize(num); |
| for (int i = 0; i < num; i++) { |
| skipToSep(); |
| if (*s == ')') s++; // tolerate 0(argument) syntax, where we started at the 'a' |
| if (*s == ':') { |
| attributes[i] = s + 1; |
| } else { |
| attributes[i] = nullptr; |
| } |
| if (i < num - 1) skipComma(); |
| } |
| s = before; |
| return attributes; |
| }; |
| // |
| auto makeBinary = [&](BinaryOp op, WasmType type) { |
| Name assign = getAssign(); |
| skipComma(); |
| auto curr = allocator.alloc<Binary>(); |
| curr->op = op; |
| auto inputs = getInputs(2); |
| curr->left = inputs[0]; |
| curr->right = inputs[1]; |
| curr->finalize(); |
| assert(curr->type == type); |
| setOutput(curr, assign); |
| }; |
| auto makeUnary = [&](UnaryOp op, WasmType type) { |
| Name assign = getAssign(); |
| skipComma(); |
| auto curr = allocator.alloc<Unary>(); |
| curr->op = op; |
| curr->value = getInput(); |
| curr->type = type; |
| setOutput(curr, assign); |
| }; |
| auto makeHost = [&](HostOp op) { |
| Name assign = getAssign(); |
| auto curr = allocator.alloc<Host>(); |
| curr->op = MemorySize; |
| setOutput(curr, assign); |
| }; |
| auto makeHost1 = [&](HostOp op) { |
| Name assign = getAssign(); |
| auto curr = allocator.alloc<Host>(); |
| curr->op = MemorySize; |
| curr->operands.push_back(getInput()); |
| setOutput(curr, assign); |
| }; |
| auto makeLoad = [&](WasmType type) { |
| skipComma(); |
| auto curr = allocator.alloc<Load>(); |
| curr->type = type; |
| int32_t bytes = getInt()/8; |
| curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); |
| curr->signed_ = match("_s"); |
| match("_u"); |
| Name assign = getAssign(); |
| getConst(&curr->offset); |
| mustMatch("("); |
| auto attributes = getAttributes(1); |
| curr->ptr = getInput(); |
| curr->align = curr->bytes; |
| if (attributes[0]) { |
| assert(strncmp(attributes[0], "p2align=", 8) == 0); |
| curr->align = pow(2, getInt(attributes[0] + 8)); |
| } |
| setOutput(curr, assign); |
| }; |
| auto makeStore = [&](WasmType type) { |
| skipComma(); |
| auto curr = allocator.alloc<Store>(); |
| curr->type = type; |
| int32_t bytes = getInt(); |
| curr->bytes = bytes > 0 ? bytes : getWasmTypeSize(type); |
| Name assign = getAssign(); |
| getConst(&curr->offset); |
| mustMatch("("); |
| auto attributes = getAttributes(2); |
| auto inputs = getInputs(2); |
| curr->ptr = inputs[0]; |
| curr->align = curr->bytes; |
| if (attributes[0]) { |
| assert(strncmp(attributes[0], "p2align=", 8) == 0); |
| curr->align = pow(2, getInt(attributes[0] + 8)); |
| } |
| curr->value = inputs[1]; |
| setOutput(curr, assign); |
| }; |
| auto makeSelect = [&](WasmType type) { |
| Name assign = getAssign(); |
| skipComma(); |
| auto curr = allocator.alloc<Select>(); |
| auto inputs = getInputs(3); |
| curr->condition = inputs[0]; |
| curr->ifTrue = inputs[1]; |
| curr->ifFalse = inputs[2]; |
| curr->type = type; |
| setOutput(curr, assign); |
| }; |
| auto makeCall = [&](WasmType type) { |
| if (match("_indirect")) { |
| // indirect call |
| auto indirect = allocator.alloc<CallIndirect>(); |
| Name assign = getAssign(); |
| int num = getNumInputs(); |
| auto inputs = getInputs(num); |
| indirect->target = inputs[0]; |
| indirect->type = type; |
| for (int i = 1; i < num; i++) { |
| indirect->operands.push_back(inputs[i]); |
| } |
| setOutput(indirect, assign); |
| auto typeName = cashew::IString((std::string("FUNCSIG_") + getSig(indirect)).c_str(), false); |
| if (wasm.functionTypesMap.count(typeName) == 0) { |
| auto type = allocator.alloc<FunctionType>(); |
| *type = sigToFunctionType(getSig(indirect)); |
| type->name = typeName; |
| wasm.addFunctionType(type); |
| indirect->fullType = type; |
| } else { |
| indirect->fullType = wasm.functionTypesMap[typeName]; |
| } |
| } else { |
| // non-indirect call |
| CallBase* curr; |
| Name assign = getAssign(); |
| Name target = cleanFunction(getCommaSeparated()); |
| auto aliased = aliasedFunctions.find(target); |
| if (aliased != aliasedFunctions.end()) target = aliased->second; |
| if (implementedFunctions.count(target) > 0) { |
| auto specific = allocator.alloc<Call>(); |
| specific->target = target; |
| curr = specific; |
| } else { |
| auto specific = allocator.alloc<CallImport>(); |
| specific->target = target; |
| curr = specific; |
| } |
| curr->type = type; |
| skipWhitespace(); |
| if (*s == ',') { |
| skipComma(); |
| int num = getNumInputs(); |
| auto inputs = getInputs(num); |
| for (int i = 0; i < num; i++) { |
| curr->operands.push_back(inputs[i]); |
| } |
| } |
| setOutput(curr, assign); |
| if (curr->is<CallImport>()) { |
| auto target = curr->cast<CallImport>()->target; |
| if (wasm.importsMap.count(target) == 0) { |
| auto import = allocator.alloc<Import>(); |
| import->name = import->base = target; |
| import->module = ENV; |
| import->type = ensureFunctionType(getSig(curr), &wasm, allocator); |
| wasm.addImport(import); |
| } |
| } |
| } |
| }; |
| auto handleTyped = [&](WasmType type) { |
| switch (*s) { |
| case 'a': { |
| if (match("add")) makeBinary(BinaryOp::Add, type); |
| else if (match("and")) makeBinary(BinaryOp::And, type); |
| else if (match("abs")) makeUnary(UnaryOp::Abs, type); |
| else abort_on("type.a"); |
| break; |
| } |
| case 'c': { |
| if (match("const")) { |
| Name assign = getAssign(); |
| if (type == i32) { |
| // may be a relocation |
| auto curr = allocator.alloc<Const>(); |
| curr->type = curr->value.type = i32; |
| getConst((uint32_t*)&curr->value.i32); |
| setOutput(curr, assign); |
| } else { |
| cashew::IString str = getStr(); |
| setOutput(parseConst(str, type, allocator), assign); |
| } |
| } |
| else if (match("call")) makeCall(type); |
| else if (match("convert_s/i32")) makeUnary(UnaryOp::ConvertSInt32, type); |
| else if (match("convert_u/i32")) makeUnary(UnaryOp::ConvertUInt32, type); |
| else if (match("convert_s/i64")) makeUnary(UnaryOp::ConvertSInt64, type); |
| else if (match("convert_u/i64")) makeUnary(UnaryOp::ConvertUInt64, type); |
| else if (match("clz")) makeUnary(UnaryOp::Clz, type); |
| else if (match("ctz")) makeUnary(UnaryOp::Ctz, type); |
| else if (match("copysign")) makeBinary(BinaryOp::CopySign, type); |
| else if (match("ceil")) makeUnary(UnaryOp::Ceil, type); |
| else abort_on("type.c"); |
| break; |
| } |
| case 'd': { |
| if (match("demote/f64")) makeUnary(UnaryOp::DemoteFloat64, type); |
| else if (match("div_s")) makeBinary(BinaryOp::DivS, type); |
| else if (match("div_u")) makeBinary(BinaryOp::DivU, type); |
| else if (match("div")) makeBinary(BinaryOp::Div, type); |
| else abort_on("type.g"); |
| break; |
| } |
| case 'e': { |
| if (match("eq")) makeBinary(BinaryOp::Eq, i32); |
| else if (match("extend_s/i32")) makeUnary(UnaryOp::ExtendSInt32, type); |
| else if (match("extend_u/i32")) makeUnary(UnaryOp::ExtendUInt32, type); |
| else abort_on("type.e"); |
| break; |
| } |
| case 'f': { |
| if (match("floor")) makeUnary(UnaryOp::Floor, type); |
| else abort_on("type.e"); |
| break; |
| } |
| case 'g': { |
| if (match("gt_s")) makeBinary(BinaryOp::GtS, i32); |
| else if (match("gt_u")) makeBinary(BinaryOp::GtU, i32); |
| else if (match("ge_s")) makeBinary(BinaryOp::GeS, i32); |
| else if (match("ge_u")) makeBinary(BinaryOp::GeU, i32); |
| else if (match("gt")) makeBinary(BinaryOp::Gt, i32); |
| else if (match("ge")) makeBinary(BinaryOp::Ge, i32); |
| else abort_on("type.g"); |
| break; |
| } |
| case 'l': { |
| if (match("lt_s")) makeBinary(BinaryOp::LtS, i32); |
| else if (match("lt_u")) makeBinary(BinaryOp::LtU, i32); |
| else if (match("le_s")) makeBinary(BinaryOp::LeS, i32); |
| else if (match("le_u")) makeBinary(BinaryOp::LeU, i32); |
| else if (match("load")) makeLoad(type); |
| else if (match("lt")) makeBinary(BinaryOp::Lt, i32); |
| else if (match("le")) makeBinary(BinaryOp::Le, i32); |
| else abort_on("type.g"); |
| break; |
| } |
| case 'm': { |
| if (match("mul")) makeBinary(BinaryOp::Mul, type); |
| else if (match("min")) makeBinary(BinaryOp::Min, type); |
| else if (match("max")) makeBinary(BinaryOp::Max, type); |
| else abort_on("type.m"); |
| break; |
| } |
| case 'n': { |
| if (match("neg")) makeUnary(UnaryOp::Neg, type); |
| else if (match("nearest")) makeUnary(UnaryOp::Nearest, type); |
| else if (match("ne")) makeBinary(BinaryOp::Ne, i32); |
| else abort_on("type.n"); |
| break; |
| } |
| case 'o': { |
| if (match("or")) makeBinary(BinaryOp::Or, type); |
| else abort_on("type.o"); |
| break; |
| } |
| case 'p': { |
| if (match("promote/f32")) makeUnary(UnaryOp::PromoteFloat32, type); |
| else if (match("popcnt")) makeUnary(UnaryOp::Popcnt, type); |
| else abort_on("type.p"); |
| break; |
| } |
| case 'r': { |
| if (match("rem_s")) makeBinary(BinaryOp::RemS, type); |
| else if (match("rem_u")) makeBinary(BinaryOp::RemU, type); |
| else if (match("reinterpret/i32") || match("reinterpret/i64")) makeUnary(UnaryOp::ReinterpretInt, type); |
| else if (match("reinterpret/f32") || match("reinterpret/f64")) makeUnary(UnaryOp::ReinterpretFloat, type); |
| else abort_on("type.r"); |
| break; |
| } |
| case 's': { |
| if (match("shr_s")) makeBinary(BinaryOp::ShrS, type); |
| else if (match("shr_u")) makeBinary(BinaryOp::ShrU, type); |
| else if (match("shl")) makeBinary(BinaryOp::Shl, type); |
| else if (match("sub")) makeBinary(BinaryOp::Sub, type); |
| else if (match("store")) makeStore(type); |
| else if (match("select")) makeSelect(type); |
| else if (match("sqrt")) makeUnary(UnaryOp::Sqrt, type); |
| else abort_on("type.s"); |
| break; |
| } |
| case 't': { |
| if (match("trunc_s/f32")) makeUnary(UnaryOp::TruncSFloat32, type); |
| else if (match("trunc_u/f32")) makeUnary(UnaryOp::TruncUFloat32, type); |
| else if (match("trunc_s/f64")) makeUnary(UnaryOp::TruncSFloat64, type); |
| else if (match("trunc_u/f64")) makeUnary(UnaryOp::TruncUFloat64, type); |
| else if (match("trunc")) makeUnary(UnaryOp::Trunc, type); |
| else abort_on("type.t"); |
| break; |
| } |
| case 'w': { |
| if (match("wrap/i64")) makeUnary(UnaryOp::WrapInt64, type); |
| else abort_on("type.w"); |
| break; |
| } |
| case 'x': { |
| if (match("xor")) makeBinary(BinaryOp::Xor, type); |
| else abort_on("type.x"); |
| break; |
| } |
| default: abort_on("type.?"); |
| } |
| }; |
| // labels |
| size_t nextLabel = 0; |
| auto getNextLabel = [&nextLabel]() { |
| return cashew::IString(("label$" + std::to_string(nextLabel++)).c_str(), false); |
| }; |
| auto getBranchLabel = [&](uint32_t offset) { |
| assert(offset < bstack.size()); |
| Expression* target = bstack[bstack.size() - 1 - offset]; |
| if (target->is<Block>()) { |
| return target->cast<Block>()->name; |
| } else { |
| return target->cast<Loop>()->in; |
| } |
| }; |
| // fixups |
| std::vector<Block*> loopBlocks; // we need to clear their names |
| // main loop |
| while (1) { |
| skipWhitespace(); |
| if (debug) dump("main function loop"); |
| if (match("i32.")) { |
| handleTyped(i32); |
| } else if (match("i64.")) { |
| handleTyped(i64); |
| } else if (match("f32.")) { |
| handleTyped(f32); |
| } else if (match("f64.")) { |
| handleTyped(f64); |
| } else if (match("block")) { |
| auto curr = allocator.alloc<Block>(); |
| curr->name = getNextLabel(); |
| addToBlock(curr); |
| bstack.push_back(curr); |
| } else if (match("end_block")) { |
| bstack.pop_back(); |
| } else if (match(".LBB")) { |
| s = strchr(s, '\n'); |
| } else if (match("loop")) { |
| auto curr = allocator.alloc<Loop>(); |
| addToBlock(curr); |
| curr->in = getNextLabel(); |
| curr->out = getNextLabel(); |
| auto block = allocator.alloc<Block>(); |
| block->name = curr->out; // temporary, fake - this way, on bstack we have the right label at the right offset for a br |
| curr->body = block; |
| loopBlocks.push_back(block); |
| bstack.push_back(block); |
| bstack.push_back(curr); |
| } else if (match("end_loop")) { |
| bstack.pop_back(); |
| bstack.pop_back(); |
| } else if (match("br")) { |
| auto curr = allocator.alloc<Break>(); |
| if (*s == '_') { |
| mustMatch("_if"); |
| curr->condition = getInput(); |
| skipComma(); |
| } |
| curr->name = getBranchLabel(getInt()); |
| addToBlock(curr); |
| } else if (match("call")) { |
| makeCall(none); |
| } else if (match("copy_local")) { |
| Name assign = getAssign(); |
| skipComma(); |
| setOutput(getInput(), assign); |
| } else if (match("tee_local")) { |
| Name assign = getAssign(); |
| skipComma(); |
| auto curr = allocator.alloc<SetLocal>(); |
| curr->name = getAssign(); |
| skipComma(); |
| curr->value = getInput(); |
| curr->type = curr->value->type; |
| setOutput(curr, assign); |
| } else if (match("return")) { |
| auto curr = allocator.alloc<Return>(); |
| if (*s == '$') { |
| curr->value = getInput(); |
| } |
| addToBlock(curr); |
| } else if (match("tableswitch")) { |
| auto curr = allocator.alloc<Switch>(); |
| curr->value = getInput(); |
| skipComma(); |
| curr->default_ = getBranchLabel(getInt()); |
| while (skipComma()) { |
| curr->targets.push_back(getBranchLabel(getInt())); |
| } |
| addToBlock(curr); |
| } else if (match("unreachable")) { |
| addToBlock(allocator.alloc<Unreachable>()); |
| } else if (match("memory_size")) { |
| makeHost(MemorySize); |
| } else if (match("grow_memory")) { |
| makeHost1(GrowMemory); |
| } else if (match(".Lfunc_end")) { |
| s = strchr(s, '\n'); |
| s++; |
| s = strchr(s, '\n'); |
| break; // the function is done |
| } else if (match(".endfunc")) { |
| break; // the function is done |
| } else { |
| abort_on("function element"); |
| } |
| } |
| // finishing touches |
| bstack.pop_back(); // remove the base block for the function body |
| assert(bstack.empty()); |
| assert(estack.empty()); |
| for (auto block : loopBlocks) { |
| block->name = Name(); |
| } |
| wasm.addFunction(func); |
| // XXX for now, export all functions |
| auto exp = allocator.alloc<Export>(); |
| exp->name = exp->value = func->name; |
| wasm.addExport(exp); |
| } |
| |
| void parseType() { |
| if (debug) dump("type"); |
| Name name = getStrToSep(); |
| skipComma(); |
| if (match("@function")) { |
| if (match(".hidden")) mustMatch(name.str); |
| return parseFunction(); |
| } else if (match("@object")) { |
| return parseObject(name); |
| } |
| abort_on("parseType"); |
| } |
| |
| void parseObject(Name name) { |
| if (debug) std::cerr << "parseObject " << name << '\n'; |
| if (match(".data") || match(".bss")) { |
| } else if (match(".section")) { |
| s = strchr(s, '\n'); |
| } else if (match(".lcomm")) { |
| parseLcomm(name); |
| return; |
| } |
| skipWhitespace(); |
| size_t align = 4; // XXX default? |
| if (match(".globl")) { |
| mustMatch(name.str); |
| skipWhitespace(); |
| } |
| if (match(".align") || match(".p2align")) { |
| align = getInt(); |
| skipWhitespace(); |
| } |
| align = pow(2, align); // convert from power to actual bytes |
| if (match(".lcomm")) { |
| parseLcomm(name, align); |
| return; |
| } |
| mustMatch(name.str); |
| mustMatch(":"); |
| auto raw = new std::vector<char>(); // leaked intentionally, no new allocation in Memory |
| bool zero = true; |
| std::vector<std::pair<size_t, size_t>> currRelocations; // [index in relocations, offset in raw] |
| while (1) { |
| skipWhitespace(); |
| if (match(".asci")) { |
| bool z; |
| if (match("i")) { |
| z = false; |
| } else { |
| mustMatch("z"); |
| z = true; |
| } |
| auto quoted = getQuoted(); |
| raw->insert(raw->end(), quoted.begin(), quoted.end()); |
| if (z) raw->push_back(0); |
| zero = false; |
| } else if (match(".zero") || match(".skip")) { |
| int32_t size = getInt(); |
| if (size <= 0) { |
| abort_on(".zero with zero or negative size"); |
| } |
| unsigned char value = 0; |
| if (skipComma()) { |
| value = getInt(); |
| if (value != 0) zero = false; |
| } |
| for (size_t i = 0, e = size; i < e; i++) { |
| raw->push_back(value); |
| } |
| } else if (match(".int8")) { |
| size_t size = raw->size(); |
| raw->resize(size + 1); |
| (*(int8_t*)(&(*raw)[size])) = getInt(); |
| zero = false; |
| } else if (match(".int16")) { |
| size_t size = raw->size(); |
| raw->resize(size + 2); |
| (*(int16_t*)(&(*raw)[size])) = getInt(); |
| zero = false; |
| } else if (match(".int32")) { |
| size_t size = raw->size(); |
| raw->resize(size + 4); |
| if (getConst((uint32_t*)&(*raw)[size])) { // just the size, as we may reallocate; we must fix this later, if it's a relocation |
| currRelocations.emplace_back(relocations.size()-1, size); |
| } |
| zero = false; |
| } else if (match(".int64")) { |
| size_t size = raw->size(); |
| raw->resize(size + 8); |
| (*(int64_t*)(&(*raw)[size])) = getInt64(); |
| zero = false; |
| } else { |
| break; |
| } |
| } |
| skipWhitespace(); |
| size_t size = raw->size(); |
| if (match(".size")) { |
| mustMatch(name.str); |
| mustMatch(","); |
| size_t seenSize = atoi(getStr().str); // TODO: optimize |
| assert(seenSize >= size); |
| while (raw->size() < seenSize) { |
| raw->push_back(0); |
| } |
| size = seenSize; |
| } |
| // raw is now finalized, prepare relocations |
| for (auto& curr : currRelocations) { |
| auto r = curr.first; |
| auto i = curr.second; |
| relocations[r].data = (uint32_t*)&(*raw)[i]; |
| } |
| // assign the address, add to memory |
| while (nextStatic % align) nextStatic++; |
| staticAddresses[name] = nextStatic; |
| if (!zero) { |
| addressSegments[nextStatic] = wasm.memory.segments.size(); |
| wasm.memory.segments.emplace_back(nextStatic, (const char*)&(*raw)[0], size); |
| } |
| nextStatic += size; |
| wasm.memory.initial = nextStatic; |
| } |
| |
| void parseLcomm(Name name, size_t align=1) { |
| mustMatch(name.str); |
| skipComma(); |
| size_t size = getInt(); |
| if (*s == ',') { |
| skipComma(); |
| getInt(); |
| } |
| while (nextStatic % align) nextStatic++; |
| staticAddresses[name] = nextStatic; |
| nextStatic += size; |
| wasm.memory.initial = nextStatic; |
| } |
| |
| void skipImports() { |
| while (1) { |
| if (match(".import")) { |
| s = strchr(s, '\n'); |
| skipWhitespace(); |
| continue; |
| } |
| break; |
| } |
| } |
| |
| void fix() { |
| auto ensureFunctionIndex = [&](Name name) { |
| if (functionIndexes.count(name) == 0) { |
| functionIndexes[name] = wasm.table.names.size(); |
| wasm.table.names.push_back(name); |
| if (debug) std::cerr << "function index: " << name << ": " << functionIndexes[name] << '\n'; |
| } |
| }; |
| for (auto& relocation : relocations) { |
| Name name = relocation.value; |
| if (debug) std::cerr << "fix relocation " << name << '\n'; |
| const auto &symbolAddress = staticAddresses.find(name); |
| if (symbolAddress != staticAddresses.end()) { |
| *(relocation.data) = symbolAddress->second + relocation.offset; |
| if (debug) std::cerr << " ==> " << *(relocation.data) << '\n'; |
| } else { |
| // must be a function address |
| if (wasm.functionsMap.count(name) == 0) { |
| std::cerr << "Unknown symbol: " << name << '\n'; |
| if (!ignoreUnknownSymbols) abort(); |
| *(relocation.data) = 0; |
| } else { |
| ensureFunctionIndex(name); |
| *(relocation.data) = functionIndexes[name] + relocation.offset; |
| } |
| } |
| } |
| } |
| |
| template<class C> |
| void printSet(std::ostream& o, C& c) { |
| o << "["; |
| bool first = true; |
| for (auto& item : c) { |
| if (first) first = false; |
| else o << ","; |
| o << '"' << item << '"'; |
| } |
| o << "]"; |
| } |
| |
| public: |
| |
| // extra emscripten processing |
| void emscriptenGlue(std::ostream& o) { |
| if (debug) { |
| std::cerr << wasm << '\n'; |
| } |
| |
| wasm.removeImport(EMSCRIPTEN_ASM_CONST); // we create _sig versions |
| |
| o << ";; METADATA: { "; |
| // find asmConst calls, and emit their metadata |
| struct AsmConstWalker : public WasmWalker<AsmConstWalker> { |
| S2WasmBuilder* parent; |
| |
| std::map<std::string, std::set<std::string>> sigsForCode; |
| std::map<std::string, size_t> ids; |
| std::set<std::string> allSigs; |
| |
| AsmConstWalker(S2WasmBuilder* parent) : parent(parent) {} |
| |
| void visitCallImport(CallImport* curr) { |
| if (curr->target == EMSCRIPTEN_ASM_CONST) { |
| auto arg = curr->operands[0]->cast<Const>(); |
| size_t segmentIndex = parent->addressSegments[arg->value.geti32()]; |
| std::string code = escape(parent->wasm.memory.segments[segmentIndex].data); |
| int32_t id; |
| if (ids.count(code) == 0) { |
| id = ids.size(); |
| ids[code] = id; |
| } else { |
| id = ids[code]; |
| } |
| std::string sig = getSig(curr); |
| sigsForCode[code].insert(sig); |
| std::string fixedTarget = std::string("_") + EMSCRIPTEN_ASM_CONST.str + '_' + sig; |
| curr->target = cashew::IString(fixedTarget.c_str(), false); |
| arg->value = Literal(id); |
| // add import, if necessary |
| if (allSigs.count(sig) == 0) { |
| allSigs.insert(sig); |
| auto import = parent->allocator.alloc<Import>(); |
| import->name = import->base = curr->target; |
| import->module = ENV; |
| import->type = ensureFunctionType(getSig(curr), &parent->wasm, parent->allocator); |
| parent->wasm.addImport(import); |
| } |
| } |
| } |
| |
| std::string escape(const char *input) { |
| std::string code = input; |
| // replace newlines quotes with escaped newlines |
| size_t curr = 0; |
| while ((curr = code.find("\\n", curr)) != std::string::npos) { |
| code = code.replace(curr, 2, "\\\\n"); |
| curr += 3; // skip this one |
| } |
| // replace double quotes with escaped single quotes |
| curr = 0; |
| while ((curr = code.find('"', curr)) != std::string::npos) { |
| if (curr == 0 || code[curr-1] != '\\') { |
| code = code.replace(curr, 1, "\\" "\""); |
| curr += 2; // skip this one |
| } else { // already escaped, escape the slash as well |
| code = code.replace(curr, 1, "\\" "\\" "\""); |
| curr += 3; // skip this one |
| } |
| } |
| return code; |
| } |
| }; |
| AsmConstWalker walker(this); |
| walker.startWalk(&wasm); |
| // print |
| o << "\"asmConsts\": {"; |
| bool first = true; |
| for (auto& pair : walker.sigsForCode) { |
| auto& code = pair.first; |
| auto& sigs = pair.second; |
| if (first) first = false; |
| else o << ","; |
| o << '"' << walker.ids[code] << "\": [\"" << code << "\", "; |
| printSet(o, sigs); |
| o << "]"; |
| } |
| o << "}"; |
| o << ","; |
| o << "\"staticBump\": " << (nextStatic - globalBase); |
| |
| o << " }"; |
| } |
| }; |
| |
| } // namespace wasm |
| |
| #endif // wasm_s2wasm_h |