| # Copyright 2011 The Emscripten Authors. All rights reserved. |
| # Emscripten is available under two separate licenses, the MIT license and the |
| # University of Illinois/NCSA Open Source License. Both these licenses can be |
| # found in the LICENSE file. |
| |
| """Utilties for manipulating WebAssembly binaries from python. |
| """ |
| |
| import logging |
| |
| from . import shared |
| |
| logger = logging.getLogger('shared') |
| |
| # For the Emscripten-specific WASM metadata section, follows semver, changes |
| # whenever metadata section changes structure. |
| # NB: major version 0 implies no compatibility |
| # NB: when changing the metadata format, we should only append new fields, not |
| # reorder, modify, or remove existing ones. |
| EMSCRIPTEN_METADATA_MAJOR, EMSCRIPTEN_METADATA_MINOR = (0, 3) |
| # For the JS/WASM ABI, specifies the minimum ABI version required of |
| # the WASM runtime implementation by the generated WASM binary. It follows |
| # semver and changes whenever C types change size/signedness or |
| # syscalls change signature. By semver, the maximum ABI version is |
| # implied to be less than (EMSCRIPTEN_ABI_MAJOR + 1, 0). On an ABI |
| # change, increment EMSCRIPTEN_ABI_MINOR if EMSCRIPTEN_ABI_MAJOR == 0 |
| # or the ABI change is backwards compatible, otherwise increment |
| # EMSCRIPTEN_ABI_MAJOR and set EMSCRIPTEN_ABI_MINOR = 0. |
| EMSCRIPTEN_ABI_MAJOR, EMSCRIPTEN_ABI_MINOR = (0, 29) |
| |
| WASM_PAGE_SIZE = 65536 |
| |
| |
| def toLEB(num): |
| assert num >= 0, 'TODO: signed' |
| ret = bytearray() |
| while 1: |
| byte = num & 127 |
| num >>= 7 |
| more = num != 0 |
| if more: |
| byte = byte | 128 |
| ret.append(byte) |
| if not more: |
| break |
| return ret |
| |
| |
| def readLEB(buf, offset): |
| result = 0 |
| shift = 0 |
| while True: |
| byte = buf[offset] |
| offset += 1 |
| result |= (byte & 0x7f) << shift |
| if not (byte & 0x80): |
| break |
| shift += 7 |
| return (result, offset) |
| |
| |
| def add_emscripten_metadata(wasm_file): |
| mem_size = shared.Settings.INITIAL_MEMORY // WASM_PAGE_SIZE |
| global_base = shared.Settings.GLOBAL_BASE |
| |
| logger.debug('creating wasm emscripten metadata section with mem size %d' % mem_size) |
| name = b'\x13emscripten_metadata' # section name, including prefixed size |
| contents = ( |
| # metadata section version |
| toLEB(EMSCRIPTEN_METADATA_MAJOR) + |
| toLEB(EMSCRIPTEN_METADATA_MINOR) + |
| |
| # NB: The structure of the following should only be changed |
| # if EMSCRIPTEN_METADATA_MAJOR is incremented |
| # Minimum ABI version |
| toLEB(EMSCRIPTEN_ABI_MAJOR) + |
| toLEB(EMSCRIPTEN_ABI_MINOR) + |
| |
| # Wasm backend, always 1 now |
| toLEB(1) + |
| |
| toLEB(mem_size) + |
| toLEB(0) + |
| toLEB(global_base) + |
| toLEB(0) + |
| # dynamictopPtr, always 0 now |
| toLEB(0) + |
| |
| # tempDoublePtr, always 0 in wasm backend |
| toLEB(0) + |
| |
| toLEB(int(shared.Settings.STANDALONE_WASM)) |
| |
| # NB: more data can be appended here as long as you increase |
| # the EMSCRIPTEN_METADATA_MINOR |
| ) |
| |
| orig = open(wasm_file, 'rb').read() |
| with open(wasm_file, 'wb') as f: |
| f.write(orig[0:8]) # copy magic number and version |
| # write the special section |
| f.write(b'\0') # user section is code 0 |
| # need to find the size of this section |
| size = len(name) + len(contents) |
| f.write(toLEB(size)) |
| f.write(name) |
| f.write(contents) |
| f.write(orig[8:]) |
| |
| |
| class Module: |
| """Extremely minimal wasm module reader. Currently only used |
| for parsing the dylink section.""" |
| def __init__(self, filename): |
| with open(filename, 'rb') as f: |
| self.buf = f.read() |
| assert self.buf[:4] == b'\0asm' |
| assert self.buf[4:8] == b'\x01\0\0\0' |
| self.offset = 8 |
| |
| def readByte(self): |
| ret = self.buf[self.offset] |
| self.offset += 1 |
| return ret |
| |
| def readLEB(self): |
| ret, self.offset = readLEB(self.buf, self.offset) |
| return ret |
| |
| def readString(self): |
| size = self.readLEB() |
| end = self.offset + size |
| s = self.buf[self.offset:end] |
| self.offset = end |
| return s.decode('utf-8') |
| |
| |
| def parse_dylink_section(wasm_file): |
| module = Module(wasm_file) |
| |
| # Read the existing section data |
| section_type = module.readByte() |
| section_size = module.readLEB() |
| assert section_type == 0 |
| section_end = module.offset + section_size |
| # section name |
| section_name = module.readString() |
| assert section_name == 'dylink' |
| mem_size = module.readLEB() |
| mem_align = module.readLEB() |
| table_size = module.readLEB() |
| table_align = module.readLEB() |
| |
| needed = [] |
| needed_count = module.readLEB() |
| while needed_count: |
| libname = module.readString() |
| needed.append(libname) |
| needed_count -= 1 |
| |
| return (mem_size, mem_align, table_size, table_align, section_end, needed) |
| |
| |
| def update_dylink_section(wasm_file, extra_dynlibs): |
| # A wasm shared library has a special "dylink" section, see tools-conventions repo. |
| # This function updates this section, adding extra dynamic library dependencies. |
| |
| mem_size, mem_align, table_size, table_align, section_end, needed = parse_dylink_section(wasm_file) |
| |
| section_name = b'\06dylink' # section name, including prefixed size |
| contents = (toLEB(mem_size) + toLEB(mem_align) + |
| toLEB(table_size) + toLEB(0)) |
| |
| # we extend "dylink" section with information about which shared libraries |
| # our shared library needs. This is similar to DT_NEEDED entries in ELF. |
| # |
| # In theory we could avoid doing this, since every import in wasm has |
| # "module" and "name" attributes, but currently emscripten almost always |
| # uses just "env" for "module". This way we have to embed information about |
| # required libraries for the dynamic linker somewhere, and "dylink" section |
| # seems to be the most relevant place. |
| # |
| # Binary format of the extension: |
| # |
| # needed_dynlibs_count varuint32 ; number of needed shared libraries |
| # needed_dynlibs_entries dynlib_entry* ; repeated dynamic library entries as described below |
| # |
| # dynlib_entry: |
| # |
| # dynlib_name_len varuint32 ; length of dynlib_name_str in bytes |
| # dynlib_name_str bytes ; name of a needed dynamic library: valid UTF-8 byte sequence |
| # |
| # a proposal has been filed to include the extension into "dylink" specification: |
| # https://github.com/WebAssembly/tool-conventions/pull/77 |
| needed += extra_dynlibs |
| contents += toLEB(len(needed)) |
| for dyn_needed in needed: |
| dyn_needed = dyn_needed.encode('utf-8') |
| contents += toLEB(len(dyn_needed)) |
| contents += dyn_needed |
| |
| orig = open(wasm_file, 'rb').read() |
| file_header = orig[:8] |
| file_remainder = orig[section_end:] |
| |
| section_size = len(section_name) + len(contents) |
| with open(wasm_file, 'wb') as f: |
| # copy magic number and version |
| f.write(file_header) |
| # write the special section |
| f.write(b'\0') # user section is code 0 |
| f.write(toLEB(section_size)) |
| f.write(section_name) |
| f.write(contents) |
| # copy rest of binary |
| f.write(file_remainder) |