blob: 97cc86a88602cc2fc62d145990bbe644420d6439 [file] [log] [blame] [edit]
# Copyright 2011 The Emscripten Authors. All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License. Both these licenses can be
# found in the LICENSE file.
"""Shared code specific to emscripten. General purpose and low-level helpers belong instead in
utils.py."""
import atexit
import logging
import os
import re
import shlex
import signal
import subprocess
import sys
import tempfile
from subprocess import PIPE
from .toolchain_profiler import ToolchainProfiler
# We depend on python 3.10 features
if sys.version_info < (3, 10): # noqa: UP036
print(f'error: emscripten requires python 3.10 or above ({sys.executable} {sys.version})', file=sys.stderr)
sys.exit(1)
from . import colored_logger
# Configure logging before importing any other local modules so even
# log message during import are shown as expected.
DEBUG = int(os.environ.get('EMCC_DEBUG', '0'))
EMCC_LOGGING = int(os.environ.get('EMCC_LOGGING', '1'))
log_level = logging.ERROR
if DEBUG:
log_level = logging.DEBUG
elif EMCC_LOGGING:
log_level = logging.INFO
# can add %(asctime)s to see timestamps
logging.basicConfig(format='%(name)s:%(levelname)s: %(message)s', level=log_level)
colored_logger.enable()
import contextlib
from . import cache, config, diagnostics, filelock, tempfiles, utils
from .settings import settings
from .utils import exe_path_from_root, exit_with_error, memoize, path_from_root, safe_ensure_dirs
DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0'))
PRINT_SUBPROCS = int(os.getenv('EMCC_VERBOSE', '0'))
SKIP_SUBPROCS = False
# Minimum node version required to run the emscripten compiler. This is
# distinct from the minimum version required to execute the generated code
# (settings.MIN_NODE_VERSION).
# This is currently set to v18 since this is the version of node available
# in debian/stable (bookworm). We need at least v18.3.0 because we make
# use of util.parseArg which was added in v18.3.0.
MINIMUM_NODE_VERSION = (18, 3, 0)
EXPECTED_LLVM_VERSION = 23
# These get set by setup_temp_dirs
TEMP_DIR = None
EMSCRIPTEN_TEMP_DIR = None
logger = logging.getLogger('shared')
# warning about absolute-paths is disabled by default, and not enabled by -Wall
diagnostics.add_warning('absolute-paths', enabled=False, part_of_all=False)
# unused diagnostic flags. TODO(sbc): remove at some point
diagnostics.add_warning('almost-asm')
diagnostics.add_warning('experimental')
# Don't show legacy settings warnings by default
diagnostics.add_warning('legacy-settings', enabled=False, part_of_all=False)
# Catch-all for other emcc warnings
diagnostics.add_warning('linkflags')
diagnostics.add_warning('emcc')
diagnostics.add_warning('undefined', error=True)
diagnostics.add_warning('deprecated', shared=True)
diagnostics.add_warning('version-check')
diagnostics.add_warning('export-main')
diagnostics.add_warning('map-unrecognized-libraries')
diagnostics.add_warning('unused-command-line-argument', shared=True)
diagnostics.add_warning('pthreads-mem-growth')
diagnostics.add_warning('transpile')
diagnostics.add_warning('limited-postlink-optimizations')
diagnostics.add_warning('em-js-i64')
diagnostics.add_warning('js-compiler')
diagnostics.add_warning('compatibility')
diagnostics.add_warning('unsupported')
diagnostics.add_warning('unused-main')
# Closure warning are not (yet) enabled by default
diagnostics.add_warning('closure', enabled=False)
def returncode_to_str(code):
assert code != 0
if code < 0:
signal_name = signal.Signals(-code).name
return f'received {signal_name} ({code})'
return f'returned {code}'
def run_multiple_processes(commands,
env=None,
route_stdout_to_temp_files_suffix=None,
cwd=None):
"""Runs multiple subprocess commands.
route_stdout_to_temp_files_suffix : string
if not None, all stdouts are instead written to files, and an array
of filenames is returned.
"""
if env is None:
env = os.environ.copy()
std_outs = []
# TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt
# when multiple child processes have been spawned.
# import signal
# def signal_handler(sig, frame):
# sys.exit(1)
# signal.signal(signal.SIGINT, signal_handler)
# Map containing all currently running processes.
# command index -> proc/Popen object
processes = {}
def get_finished_process():
while True:
for idx, proc in processes.items():
if proc.poll() is not None:
return idx
# All processes still running; wait a short while for the first
# (oldest) process to finish, then look again if any process has completed.
idx, proc = next(iter(processes.items()))
try:
proc.communicate(timeout=0.2)
return idx
except subprocess.TimeoutExpired:
pass
num_parallel_processes = utils.get_num_cores()
temp_files = get_temp_files()
i = 0
num_completed = 0
while num_completed < len(commands):
if i < len(commands) and len(processes) < num_parallel_processes:
# Not enough parallel processes running, spawn a new one.
if route_stdout_to_temp_files_suffix:
stdout = temp_files.get(route_stdout_to_temp_files_suffix)
else:
stdout = None
if DEBUG:
logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i])))
print_compiler_stage(commands[i])
proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd)
processes[i] = proc
if route_stdout_to_temp_files_suffix:
std_outs.append((i, stdout.name))
i += 1
else:
# Not spawning a new process (Too many commands running in parallel, or
# no commands left): find if a process has finished.
idx = get_finished_process()
finished_process = processes.pop(idx)
if finished_process.returncode != 0:
exit_with_error('subprocess %d/%d failed (%s)! (cmdline: %s)' % (idx + 1, len(commands), returncode_to_str(finished_process.returncode), shlex.join(commands[idx])))
num_completed += 1
if route_stdout_to_temp_files_suffix:
# If processes finished out of order, sort the results to the order of the input.
std_outs.sort(key=lambda x: x[0])
return [x[1] for x in std_outs]
def check_call(cmd, *args, **kw):
"""Like `run_process` above but treat failures as fatal and exit_with_error."""
print_compiler_stage(cmd)
if SKIP_SUBPROCS:
return 0
try:
return utils.run_process(cmd, *args, **kw)
except subprocess.CalledProcessError as e:
exit_with_error("'%s' failed (%s)", shlex.join(cmd), returncode_to_str(e.returncode))
except OSError as e:
exit_with_error("'%s' failed: %s", shlex.join(cmd), e)
def exec_process(cmd):
print_compiler_stage(cmd)
utils.exec(cmd)
def run_js_tool(filename, jsargs=[], node_args=[], **kw): # noqa: B006
"""Execute a javascript tool.
This is used by emcc to run parts of the build process that are
implemented in javascript.
"""
command = config.NODE_JS + node_args + [filename] + jsargs
return check_call(command, **kw).stdout
def get_npm_cmd(name, missing_ok=False):
if utils.WINDOWS:
cmd = [path_from_root('node_modules/.bin', name + '.cmd')]
else:
cmd = config.NODE_JS + [path_from_root('node_modules/.bin', name)]
if not os.path.exists(cmd[-1]):
if missing_ok:
return None
else:
exit_with_error(f'{name} was not found! Please run "npm install" in Emscripten root directory to set up npm dependencies')
return cmd
@memoize
def get_clang_version():
if not os.path.exists(CLANG_CC):
exit_with_error('clang executable not found at `%s`' % CLANG_CC)
proc = check_call([CLANG_CC, '--version'], stdout=PIPE)
m = re.search(r'[Vv]ersion\s+(\d+\.\d+)', proc.stdout)
return m and m.group(1)
def check_llvm_version():
actual = get_clang_version()
if actual.startswith('%d.' % EXPECTED_LLVM_VERSION):
return True
# When running in CI environment we also silently allow the next major
# version of LLVM here so that new versions of LLVM can be rolled in
# without disruption.
if 'BUILDBOT_BUILDNUMBER' in os.environ:
if actual.startswith('%d.' % (EXPECTED_LLVM_VERSION + 1)):
return True
diagnostics.warning('version-check', 'LLVM version for clang executable "%s" appears incorrect (seeing "%s", expected "%s")', CLANG_CC, actual, EXPECTED_LLVM_VERSION)
return False
def get_clang_targets():
if not os.path.exists(CLANG_CC):
exit_with_error('clang executable not found at `%s`' % CLANG_CC)
try:
target_info = utils.run_process([CLANG_CC, '-print-targets'], stdout=PIPE).stdout
except subprocess.CalledProcessError:
exit_with_error('error running `clang -print-targets`. Check your llvm installation (%s)' % CLANG_CC)
if 'Registered Targets:' not in target_info:
exit_with_error('error parsing output of `clang -print-targets`. Check your llvm installation (%s)' % CLANG_CC)
return target_info.split('Registered Targets:')[1]
def check_llvm():
targets = get_clang_targets()
if 'wasm32' not in targets:
logger.critical('LLVM has not been built with the WebAssembly backend, clang reports:')
print('===========================================================================', file=sys.stderr)
print(targets, file=sys.stderr)
print('===========================================================================', file=sys.stderr)
return False
return True
def get_node_directory():
return os.path.dirname(config.NODE_JS[0] if type(config.NODE_JS) is list else config.NODE_JS)
# When we run some tools from npm (closure, html-minifier-terser), those
# expect that the tools have node.js accessible in PATH. Place our node
# there when invoking those tools.
def env_with_node_in_path():
env = os.environ.copy()
env['PATH'] = get_node_directory() + os.pathsep + env['PATH']
return env
def _get_node_version_pair(nodejs):
actual = utils.run_process(nodejs + ['--version'], stdout=PIPE).stdout.strip()
version = actual.removeprefix('v')
version = version.split('-')[0].split('.')
version = tuple(int(v) for v in version)
return actual, version
def get_node_version(nodejs):
return _get_node_version_pair(nodejs)[1]
@memoize
def check_node_version():
try:
actual, version = _get_node_version_pair(config.NODE_JS)
except Exception as e:
diagnostics.warning('version-check', 'cannot check node version: %s', e)
return
# Skip the version check is we are running `bun` instead of node.
if version < MINIMUM_NODE_VERSION and 'bun' not in os.path.basename(config.NODE_JS[0]):
expected = '.'.join(str(v) for v in MINIMUM_NODE_VERSION)
diagnostics.warning('version-check', f'node version appears too old (seeing "{actual}", expected "v{expected}")')
return version
def node_reference_types_flags(nodejs):
node_version = get_node_version(nodejs)
# reference types were enabled by default in node v18.
if node_version and node_version < (18, 0, 0):
return ['--experimental-wasm-reftypes']
else:
return []
def node_exception_flags(nodejs):
node_version = get_node_version(nodejs)
# Legacy exception handling was enabled by default in node v17.
if node_version and node_version < (17, 0, 0):
return ['--experimental-wasm-eh']
# Standard exception handling was supported behind flag in node v22.
if node_version and node_version >= (22, 0, 0) and not settings.WASM_LEGACY_EXCEPTIONS:
return ['--experimental-wasm-exnref']
return []
def node_pthread_flags(nodejs):
node_version = get_node_version(nodejs)
# bulk memory and wasm threads were enabled by default in node v16.
if node_version and node_version < (16, 0, 0):
return ['--experimental-wasm-bulk-memory', '--experimental-wasm-threads']
else:
return []
@memoize
@ToolchainProfiler.profile()
def check_node():
try:
utils.run_process(config.NODE_JS + ['-e', 'console.log("hello")'], stdout=PIPE)
except Exception as e:
exit_with_error('the configured node executable (%s) does not seem to work, check the paths in %s (%s)', config.NODE_JS, config.EM_CONFIG, e)
def generate_sanity():
return f'{utils.EMSCRIPTEN_VERSION}|{config.LLVM_ROOT}\n'
@memoize
def perform_sanity_checks(quiet=False):
# some warning, mostly not fatal checks - do them even if EM_IGNORE_SANITY is on
check_node_version()
check_llvm_version()
llvm_ok = check_llvm()
if os.environ.get('EM_IGNORE_SANITY'):
logger.info('EM_IGNORE_SANITY set, ignoring sanity checks')
return
if not quiet:
logger.info('(Emscripten: Running sanity checks)')
if not llvm_ok:
exit_with_error('failing sanity checks due to previous llvm failure')
check_node()
with ToolchainProfiler.profile_block('sanity LLVM'):
for cmd in (CLANG_CC, LLVM_AR):
if not os.path.exists(cmd) and not os.path.exists(cmd + '.exe'): # .exe extension required for Windows
exit_with_error('cannot find %s, check the paths in %s', cmd, config.EM_CONFIG)
@ToolchainProfiler.profile()
def check_sanity(force=False, quiet=False):
"""Check that basic stuff we need (a JS engine to compile, Node.js, and Clang
and LLVM) exists.
The test runner always does this check (through |force|). emcc does this less
frequently, only when ${EM_CONFIG}_sanity does not exist or is older than
EM_CONFIG (so, we re-check sanity when the settings are changed). We also
re-check sanity and clear the cache when the version changes.
"""
if not force and os.environ.get('EMCC_SKIP_SANITY_CHECK') == '1':
return
# We set EMCC_SKIP_SANITY_CHECK so that any subprocesses that we launch will
# not re-run the tests.
os.environ['EMCC_SKIP_SANITY_CHECK'] = '1'
# In DEBUG mode we perform the sanity checks even when
# early return due to the file being up-to-date.
if DEBUG:
force = True
if config.FROZEN_CACHE:
if force:
perform_sanity_checks(quiet)
return
if os.environ.get('EM_IGNORE_SANITY'):
perform_sanity_checks(quiet)
return
expected = generate_sanity()
sanity_file = cache.get_path('sanity.txt')
def sanity_is_correct():
sanity_data = None
# We can't simply check for the existence of sanity_file and then read from
# it here because we don't hold the cache lock yet and some other process
# could clear the cache between checking for, and reading from, the file.
with contextlib.suppress(Exception):
sanity_data = utils.read_file(sanity_file)
if sanity_data == expected:
logger.debug(f'sanity file up-to-date: {sanity_file}')
# Even if the sanity file is up-to-date we still run the checks
# when force is set.
if force:
perform_sanity_checks(quiet)
return True # all is well
return False
if sanity_is_correct():
# Early return without taking the cache lock
return
with cache.lock('sanity'):
# Check again once the cache lock as acquired
if sanity_is_correct():
return
if os.path.exists(sanity_file):
sanity_data = utils.read_file(sanity_file)
logger.info('old sanity: %s', sanity_data.strip())
logger.info('new sanity: %s', expected.strip())
logger.info('(Emscripten: config changed, clearing cache)')
cache.erase()
else:
logger.debug(f'sanity file not found: {sanity_file}')
perform_sanity_checks()
# Only create/update this file if the sanity check succeeded, i.e., we got here
utils.write_file(sanity_file, expected)
def llvm_tool_path_with_suffix(tool, suffix):
if suffix:
tool += '-' + suffix
llvm_root = os.path.expanduser(config.LLVM_ROOT)
return utils.find_exe(llvm_root, tool)
# Some distributions ship with multiple llvm versions so they add
# the version to the binaries, cope with that
def llvm_tool_path(tool):
return llvm_tool_path_with_suffix(tool, config.LLVM_ADD_VERSION)
# Some distributions ship with multiple clang versions so they add
# the version to the binaries, cope with that
def clang_tool_path(tool):
return llvm_tool_path_with_suffix(tool, config.CLANG_ADD_VERSION)
# In MINIMAL_RUNTIME mode, keep suffixes of generated files simple
# ('.mem' instead of '.js.mem'; .'symbols' instead of '.js.symbols' etc)
# Retain the original naming scheme in traditional runtime.
def replace_or_append_suffix(filename, new_suffix):
assert new_suffix[0] == '.'
return utils.replace_suffix(filename, new_suffix) if settings.MINIMAL_RUNTIME else filename + new_suffix
# Temp dir. Create a random one, unless EMCC_DEBUG is set, in which case use the canonical
# temp directory (TEMP_DIR/emscripten_temp).
@memoize
def get_emscripten_temp_dir():
"""Returns a path to EMSCRIPTEN_TEMP_DIR, creating one if it didn't exist."""
global EMSCRIPTEN_TEMP_DIR
if not EMSCRIPTEN_TEMP_DIR:
EMSCRIPTEN_TEMP_DIR = tempfile.mkdtemp(prefix='emscripten_temp_', dir=TEMP_DIR)
if not DEBUG_SAVE:
def prepare_to_clean_temp(d):
def clean_temp():
utils.delete_dir(d)
atexit.register(clean_temp)
# this global var might change later
prepare_to_clean_temp(EMSCRIPTEN_TEMP_DIR)
return EMSCRIPTEN_TEMP_DIR
def in_temp(name):
return os.path.join(get_emscripten_temp_dir(), os.path.basename(name))
def get_canonical_temp_dir(temp_dir):
return os.path.join(temp_dir, 'emscripten_temp')
def setup_temp_dirs():
global EMSCRIPTEN_TEMP_DIR, CANONICAL_TEMP_DIR, TEMP_DIR
EMSCRIPTEN_TEMP_DIR = None
TEMP_DIR = os.environ.get("EMCC_TEMP_DIR", tempfile.gettempdir())
if not os.path.isdir(TEMP_DIR):
exit_with_error(f'The temporary directory `{TEMP_DIR}` does not exist! Please make sure that the path is correct.')
CANONICAL_TEMP_DIR = get_canonical_temp_dir(TEMP_DIR)
if DEBUG:
EMSCRIPTEN_TEMP_DIR = CANONICAL_TEMP_DIR
try:
safe_ensure_dirs(EMSCRIPTEN_TEMP_DIR)
except Exception as e:
exit_with_error('error creating canonical temp dir (Check definition of TEMP_DIR in %s): %s', config.EM_CONFIG, e)
# Since the canonical temp directory is, by definition, the same
# between all processes that run in DEBUG mode we need to use a multi
# process lock to prevent more than one process from writing to it.
# This is because emcc assumes that it can use non-unique names inside
# the temp directory.
# Sadly we need to allow child processes to access this directory
# though, since emcc can recursively call itself when building
# libraries and ports.
if 'EM_HAVE_TEMP_DIR_LOCK' not in os.environ:
filelock_name = os.path.join(EMSCRIPTEN_TEMP_DIR, 'emscripten.lock')
lock = filelock.FileLock(filelock_name)
os.environ['EM_HAVE_TEMP_DIR_LOCK'] = '1'
lock.acquire()
atexit.register(lock.release)
@memoize
def get_temp_files():
if DEBUG_SAVE:
# In debug mode store all temp files in the emscripten-specific temp dir
# and don't worry about cleaning them up.
return tempfiles.TempFiles(get_emscripten_temp_dir(), save_debug_files=True)
else:
# Otherwise use the system tempdir and try to clean up after ourselves.
return tempfiles.TempFiles(TEMP_DIR, save_debug_files=False)
def print_compiler_stage(cmd):
"""Emulate the '-v/-###' flags of clang/gcc by printing the sub-commands
that we run."""
def maybe_quote(arg):
if all(c.isalnum() or c in './-_' for c in arg):
return arg
else:
return f'"{arg}"'
if SKIP_SUBPROCS:
print(' ' + ' '.join([maybe_quote(a) for a in cmd]), file=sys.stderr)
sys.stderr.flush()
elif PRINT_SUBPROCS:
print(' %s %s' % (maybe_quote(cmd[0]), shlex.join(cmd[1:])), file=sys.stderr)
sys.stderr.flush()
def demangle_c_symbol_name(name):
if not is_c_symbol(name):
return '$' + name
return name[1:] if name.startswith('_') else name
def is_c_symbol(name):
return name.startswith('_')
def is_internal_global(name):
internal_start_stop_symbols = {'__start_em_asm', '__stop_em_asm',
'__start_em_js', '__stop_em_js',
'__start_em_lib_deps', '__stop_em_lib_deps',
'__em_lib_deps'}
internal_prefixes = ('__em_js__', '__em_lib_deps')
return name in internal_start_stop_symbols or any(name.startswith(p) for p in internal_prefixes)
def is_user_export(name):
if is_internal_global(name):
return False
return name not in ['__asyncify_data', '__asyncify_state', '__indirect_function_table', 'memory'] and not name.startswith(('dynCall_', 'orig$'))
def asmjs_mangle(name):
"""Mangle a name the way asm.js/JSBackend globals are mangled.
Prepends '_' and replaces non-alphanumerics with '_'.
Used by wasm backend for JS library consistency with asm.js.
"""
# We also use this function to convert the clang-mangled `__main_argc_argv`
# to simply `main` which is expected by the emscripten JS glue code.
if name == '__main_argc_argv':
name = 'main'
if is_user_export(name):
return '_' + name
return name
def do_replace(input_, pattern, replacement):
if pattern not in input_:
exit_with_error('expected to find pattern in input JS: %s' % pattern)
return input_.replace(pattern, replacement)
def get_llvm_target():
if settings.MEMORY64:
return 'wasm64-unknown-emscripten'
else:
return 'wasm32-unknown-emscripten'
def init():
utils.set_version_globals()
setup_temp_dirs()
# ============================================================================
# End declarations.
# ============================================================================
# Everything below this point is top level code that get run when importing this
# file. TODO(sbc): We should try to reduce that amount we do here and instead
# have consumers explicitly call initialization functions.
CLANG_CC = clang_tool_path('clang')
CLANG_CXX = clang_tool_path('clang++')
CLANG_SCAN_DEPS = llvm_tool_path('clang-scan-deps')
LLVM_AR = llvm_tool_path('llvm-ar')
LLVM_DWP = llvm_tool_path('llvm-dwp')
LLVM_RANLIB = llvm_tool_path('llvm-ranlib')
LLVM_NM = llvm_tool_path('llvm-nm')
LLVM_DWARFDUMP = llvm_tool_path('llvm-dwarfdump')
LLVM_OBJCOPY = llvm_tool_path('llvm-objcopy')
LLVM_STRIP = llvm_tool_path('llvm-strip')
WASM_LD = llvm_tool_path('wasm-ld')
LLVM_PROFDATA = llvm_tool_path('llvm-profdata')
LLVM_COV = llvm_tool_path('llvm-cov')
EMCC = exe_path_from_root('emcc')
EMXX = exe_path_from_root('em++')
EMAR = exe_path_from_root('emar')
EMRANLIB = exe_path_from_root('emranlib')
FILE_PACKAGER = exe_path_from_root('tools/file_packager')
# Windows .dll suffix is not included in this list, since those are never
# linked to directly on the command line.
DYLIB_EXTENSIONS = ['.dylib', '.so']
run_via_emxx = False
init()