tools/shared.py - external/github.com/emscripten-core/emscripten - Git at Google

 # Copyright 2011 The Emscripten Authors.  All rights reserved.
 # Emscripten is available under two separate licenses, the MIT license and the
 # University of Illinois/NCSA Open Source License.  Both these licenses can be
 # found in the LICENSE file.

 """Shared code specific to emscripten.  General purpose and low-level helpers belong instead in
 utils.py."""

 import atexit
 import logging
 import os
 import re
 import shlex
 import signal
 import subprocess
 import sys
 import tempfile
 from subprocess import PIPE

 from .toolchain_profiler import ToolchainProfiler

 # We depend on python 3.10 features
 if sys.version_info < (3, 10): # noqa: UP036
   print(f'error: emscripten requires python 3.10 or above ({sys.executable} {sys.version})', file=sys.stderr)
   sys.exit(1)

 from . import colored_logger

 # Configure logging before importing any other local modules so even
 # log message during import are shown as expected.
 DEBUG = int(os.environ.get('EMCC_DEBUG', '0'))
 EMCC_LOGGING = int(os.environ.get('EMCC_LOGGING', '1'))
 log_level = logging.ERROR
 if DEBUG:
   log_level = logging.DEBUG
 elif EMCC_LOGGING:
   log_level = logging.INFO
 # can add  %(asctime)s  to see timestamps
 logging.basicConfig(format='%(name)s:%(levelname)s: %(message)s', level=log_level)
 colored_logger.enable()

 import contextlib

 from . import cache, config, diagnostics, filelock, tempfiles, utils
 from .settings import settings
 from .utils import exe_path_from_root, exit_with_error, memoize, path_from_root, safe_ensure_dirs

 DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0'))
 PRINT_SUBPROCS = int(os.getenv('EMCC_VERBOSE', '0'))
 SKIP_SUBPROCS = False

 # Minimum node version required to run the emscripten compiler.  This is
 # distinct from the minimum version required to execute the generated code
 # (settings.MIN_NODE_VERSION).
 # This is currently set to v18 since this is the version of node available
 # in debian/stable (bookworm).  We need at least v18.3.0 because we make
 # use of util.parseArg which was added in v18.3.0.
 MINIMUM_NODE_VERSION = (18, 3, 0)
 EXPECTED_LLVM_VERSION = 23

 # These get set by setup_temp_dirs
 TEMP_DIR = None
 EMSCRIPTEN_TEMP_DIR = None

 logger = logging.getLogger('shared')

 # warning about absolute-paths is disabled by default, and not enabled by -Wall
 diagnostics.add_warning('absolute-paths', enabled=False, part_of_all=False)
 # unused diagnostic flags.  TODO(sbc): remove at some point
 diagnostics.add_warning('almost-asm')
 diagnostics.add_warning('experimental')
 # Don't show legacy settings warnings by default
 diagnostics.add_warning('legacy-settings', enabled=False, part_of_all=False)
 # Catch-all for other emcc warnings
 diagnostics.add_warning('linkflags')
 diagnostics.add_warning('emcc')
 diagnostics.add_warning('undefined', error=True)
 diagnostics.add_warning('deprecated', shared=True)
 diagnostics.add_warning('version-check')
 diagnostics.add_warning('export-main')
 diagnostics.add_warning('map-unrecognized-libraries')
 diagnostics.add_warning('unused-command-line-argument', shared=True)
 diagnostics.add_warning('pthreads-mem-growth')
 diagnostics.add_warning('transpile')
 diagnostics.add_warning('limited-postlink-optimizations')
 diagnostics.add_warning('em-js-i64')
 diagnostics.add_warning('js-compiler')
 diagnostics.add_warning('compatibility')
 diagnostics.add_warning('unsupported')
 diagnostics.add_warning('unused-main')
 # Closure warning are not (yet) enabled by default
 diagnostics.add_warning('closure', enabled=False)


 def returncode_to_str(code):
   assert code != 0
   if code < 0:
     signal_name = signal.Signals(-code).name
     return f'received {signal_name} ({code})'

   return f'returned {code}'


 def run_multiple_processes(commands,
                            env=None,
                            route_stdout_to_temp_files_suffix=None,
                            cwd=None):
   """Runs multiple subprocess commands.

   route_stdout_to_temp_files_suffix : string
     if not None, all stdouts are instead written to files, and an array
     of filenames is returned.
   """

   if env is None:
     env = os.environ.copy()

   std_outs = []

   # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt
   # when multiple child processes have been spawned.
   # import signal
   # def signal_handler(sig, frame):
   #   sys.exit(1)
   # signal.signal(signal.SIGINT, signal_handler)

   # Map containing all currently running processes.
   # command index -> proc/Popen object
   processes = {}

   def get_finished_process():
     while True:
       for idx, proc in processes.items():
         if proc.poll() is not None:
           return idx
       # All processes still running; wait a short while for the first
       # (oldest) process to finish, then look again if any process has completed.
       idx, proc = next(iter(processes.items()))
       try:
         proc.communicate(timeout=0.2)
         return idx
       except subprocess.TimeoutExpired:
         pass

   num_parallel_processes = utils.get_num_cores()
   temp_files = get_temp_files()
   i = 0
   num_completed = 0
   while num_completed < len(commands):
     if i < len(commands) and len(processes) < num_parallel_processes:
       # Not enough parallel processes running, spawn a new one.
       if route_stdout_to_temp_files_suffix:
         stdout = temp_files.get(route_stdout_to_temp_files_suffix)
       else:
         stdout = None
       if DEBUG:
         logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i])))
       print_compiler_stage(commands[i])
       proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd)
       processes[i] = proc
       if route_stdout_to_temp_files_suffix:
         std_outs.append((i, stdout.name))
       i += 1
     else:
       # Not spawning a new process (Too many commands running in parallel, or
       # no commands left): find if a process has finished.
       idx = get_finished_process()
       finished_process = processes.pop(idx)
       if finished_process.returncode != 0:
         exit_with_error('subprocess %d/%d failed (%s)! (cmdline: %s)' % (idx + 1, len(commands), returncode_to_str(finished_process.returncode), shlex.join(commands[idx])))
       num_completed += 1

   if route_stdout_to_temp_files_suffix:
     # If processes finished out of order, sort the results to the order of the input.
     std_outs.sort(key=lambda x: x[0])
     return [x[1] for x in std_outs]


 def check_call(cmd, *args, **kw):
   """Like `run_process` above but treat failures as fatal and exit_with_error."""
   print_compiler_stage(cmd)
   if SKIP_SUBPROCS:
     return 0
   try:
     return utils.run_process(cmd, *args, **kw)
   except subprocess.CalledProcessError as e:
     exit_with_error("'%s' failed (%s)", shlex.join(cmd), returncode_to_str(e.returncode))
   except OSError as e:
     exit_with_error("'%s' failed: %s", shlex.join(cmd), e)


 def exec_process(cmd):
   print_compiler_stage(cmd)
   utils.exec(cmd)


 def run_js_tool(filename, jsargs=[], node_args=[], **kw):  # noqa: B006
   """Execute a javascript tool.

   This is used by emcc to run parts of the build process that are
   implemented in javascript.
   """
   command = config.NODE_JS + node_args + [filename] + jsargs
   return check_call(command, **kw).stdout


 def get_npm_cmd(name, missing_ok=False):
   if utils.WINDOWS:
     cmd = [path_from_root('node_modules/.bin', name + '.cmd')]
   else:
     cmd = config.NODE_JS + [path_from_root('node_modules/.bin', name)]
   if not os.path.exists(cmd[-1]):
     if missing_ok:
       return None
     else:
       exit_with_error(f'{name} was not found! Please run "npm install" in Emscripten root directory to set up npm dependencies')
   return cmd


 @memoize
 def get_clang_version():
   if not os.path.exists(CLANG_CC):
     exit_with_error('clang executable not found at `%s`' % CLANG_CC)
   proc = check_call([CLANG_CC, '--version'], stdout=PIPE)
   m = re.search(r'[Vv]ersion\s+(\d+\.\d+)', proc.stdout)
   return m and m.group(1)


 def check_llvm_version():
   actual = get_clang_version()
   if actual.startswith('%d.' % EXPECTED_LLVM_VERSION):
     return True
   # When running in CI environment we also silently allow the next major
   # version of LLVM here so that new versions of LLVM can be rolled in
   # without disruption.
   if 'BUILDBOT_BUILDNUMBER' in os.environ:
     if actual.startswith('%d.' % (EXPECTED_LLVM_VERSION + 1)):
       return True
   diagnostics.warning('version-check', 'LLVM version for clang executable "%s" appears incorrect (seeing "%s", expected "%s")', CLANG_CC, actual, EXPECTED_LLVM_VERSION)
   return False


 def get_clang_targets():
   if not os.path.exists(CLANG_CC):
     exit_with_error('clang executable not found at `%s`' % CLANG_CC)
   try:
     target_info = utils.run_process([CLANG_CC, '-print-targets'], stdout=PIPE).stdout
   except subprocess.CalledProcessError:
     exit_with_error('error running `clang -print-targets`.  Check your llvm installation (%s)' % CLANG_CC)
   if 'Registered Targets:' not in target_info:
     exit_with_error('error parsing output of `clang -print-targets`.  Check your llvm installation (%s)' % CLANG_CC)
   return target_info.split('Registered Targets:')[1]


 def check_llvm():
   targets = get_clang_targets()
   if 'wasm32' not in targets:
     logger.critical('LLVM has not been built with the WebAssembly backend, clang reports:')
     print('===========================================================================', file=sys.stderr)
     print(targets, file=sys.stderr)
     print('===========================================================================', file=sys.stderr)
     return False

   return True


 def get_node_directory():
   return os.path.dirname(config.NODE_JS[0] if type(config.NODE_JS) is list else config.NODE_JS)


 # When we run some tools from npm (closure, html-minifier-terser), those
 # expect that the tools have node.js accessible in PATH. Place our node
 # there when invoking those tools.
 def env_with_node_in_path():
   env = os.environ.copy()
   env['PATH'] = get_node_directory() + os.pathsep + env['PATH']
   return env


 def _get_node_version_pair(nodejs):
   actual = utils.run_process(nodejs + ['--version'], stdout=PIPE).stdout.strip()
   version = actual.removeprefix('v')
   version = version.split('-')[0].split('.')
   version = tuple(int(v) for v in version)
   return actual, version


 def get_node_version(nodejs):
   return _get_node_version_pair(nodejs)[1]


 @memoize
 def check_node_version():
   try:
     actual, version = _get_node_version_pair(config.NODE_JS)
   except Exception as e:
     diagnostics.warning('version-check', 'cannot check node version: %s', e)
     return

   # Skip the version check is we are running `bun` instead of node.
   if version < MINIMUM_NODE_VERSION and 'bun' not in os.path.basename(config.NODE_JS[0]):
     expected = '.'.join(str(v) for v in MINIMUM_NODE_VERSION)
     diagnostics.warning('version-check', f'node version appears too old (seeing "{actual}", expected "v{expected}")')

   return version


 def node_reference_types_flags(nodejs):
   node_version = get_node_version(nodejs)
   # reference types were enabled by default in node v18.
   if node_version and node_version < (18, 0, 0):
     return ['--experimental-wasm-reftypes']
   else:
     return []


 def node_exception_flags(nodejs):
   node_version = get_node_version(nodejs)
   # Legacy exception handling was enabled by default in node v17.
   if node_version and node_version < (17, 0, 0):
     return ['--experimental-wasm-eh']
   # Standard exception handling was supported behind flag in node v22.
   if node_version and node_version >= (22, 0, 0) and not settings.WASM_LEGACY_EXCEPTIONS:
     return ['--experimental-wasm-exnref']
   return []


 def node_pthread_flags(nodejs):
   node_version = get_node_version(nodejs)
   # bulk memory and wasm threads were enabled by default in node v16.
   if node_version and node_version < (16, 0, 0):
     return ['--experimental-wasm-bulk-memory', '--experimental-wasm-threads']
   else:
     return []


 @memoize
 @ToolchainProfiler.profile()
 def check_node():
   try:
     utils.run_process(config.NODE_JS + ['-e', 'console.log("hello")'], stdout=PIPE)
   except Exception as e:
     exit_with_error('the configured node executable (%s) does not seem to work, check the paths in %s (%s)', config.NODE_JS, config.EM_CONFIG, e)


 def generate_sanity():
   return f'{utils.EMSCRIPTEN_VERSION}|{config.LLVM_ROOT}\n'


 @memoize
 def perform_sanity_checks(quiet=False):
   # some warning, mostly not fatal checks - do them even if EM_IGNORE_SANITY is on
   check_node_version()
   check_llvm_version()

   llvm_ok = check_llvm()

   if os.environ.get('EM_IGNORE_SANITY'):
     logger.info('EM_IGNORE_SANITY set, ignoring sanity checks')
     return

   if not quiet:
     logger.info('(Emscripten: Running sanity checks)')

   if not llvm_ok:
     exit_with_error('failing sanity checks due to previous llvm failure')

   check_node()

   with ToolchainProfiler.profile_block('sanity LLVM'):
     for cmd in (CLANG_CC, LLVM_AR):
       if not os.path.exists(cmd) and not os.path.exists(cmd + '.exe'):  # .exe extension required for Windows
         exit_with_error('cannot find %s, check the paths in %s', cmd, config.EM_CONFIG)


 @ToolchainProfiler.profile()
 def check_sanity(force=False, quiet=False):
   """Check that basic stuff we need (a JS engine to compile, Node.js, and Clang
   and LLVM) exists.

   The test runner always does this check (through |force|). emcc does this less
   frequently, only when ${EM_CONFIG}_sanity does not exist or is older than
   EM_CONFIG (so, we re-check sanity when the settings are changed).  We also
   re-check sanity and clear the cache when the version changes.
   """
   if not force and os.environ.get('EMCC_SKIP_SANITY_CHECK') == '1':
     return

   # We set EMCC_SKIP_SANITY_CHECK so that any subprocesses that we launch will
   # not re-run the tests.
   os.environ['EMCC_SKIP_SANITY_CHECK'] = '1'

   # In DEBUG mode we perform the sanity checks even when
   # early return due to the file being up-to-date.
   if DEBUG:
     force = True

   if config.FROZEN_CACHE:
     if force:
       perform_sanity_checks(quiet)
     return

   if os.environ.get('EM_IGNORE_SANITY'):
     perform_sanity_checks(quiet)
     return

   expected = generate_sanity()

   sanity_file = cache.get_path('sanity.txt')

   def sanity_is_correct():
     sanity_data = None
     # We can't simply check for the existence of sanity_file and then read from
     # it here because we don't hold the cache lock yet and some other process
     # could clear the cache between checking for, and reading from, the file.
     with contextlib.suppress(Exception):
       sanity_data = utils.read_file(sanity_file)
     if sanity_data == expected:
       logger.debug(f'sanity file up-to-date: {sanity_file}')
       # Even if the sanity file is up-to-date we still run the checks
       # when force is set.
       if force:
         perform_sanity_checks(quiet)
       return True # all is well
     return False

   if sanity_is_correct():
     # Early return without taking the cache lock
     return

   with cache.lock('sanity'):
     # Check again once the cache lock as acquired
     if sanity_is_correct():
       return

     if os.path.exists(sanity_file):
       sanity_data = utils.read_file(sanity_file)
       logger.info('old sanity: %s', sanity_data.strip())
       logger.info('new sanity: %s', expected.strip())
       logger.info('(Emscripten: config changed, clearing cache)')
       cache.erase()
     else:
       logger.debug(f'sanity file not found: {sanity_file}')

     perform_sanity_checks()

     # Only create/update this file if the sanity check succeeded, i.e., we got here
     utils.write_file(sanity_file, expected)


 def llvm_tool_path_with_suffix(tool, suffix):
   if suffix:
     tool += '-' + suffix
   llvm_root = os.path.expanduser(config.LLVM_ROOT)
   return utils.find_exe(llvm_root, tool)


 # Some distributions ship with multiple llvm versions so they add
 # the version to the binaries, cope with that
 def llvm_tool_path(tool):
   return llvm_tool_path_with_suffix(tool, config.LLVM_ADD_VERSION)


 # Some distributions ship with multiple clang versions so they add
 # the version to the binaries, cope with that
 def clang_tool_path(tool):
   return llvm_tool_path_with_suffix(tool, config.CLANG_ADD_VERSION)


 # In MINIMAL_RUNTIME mode, keep suffixes of generated files simple
 # ('.mem' instead of '.js.mem'; .'symbols' instead of '.js.symbols' etc)
 # Retain the original naming scheme in traditional runtime.
 def replace_or_append_suffix(filename, new_suffix):
   assert new_suffix[0] == '.'
   return utils.replace_suffix(filename, new_suffix) if settings.MINIMAL_RUNTIME else filename + new_suffix


 # Temp dir. Create a random one, unless EMCC_DEBUG is set, in which case use the canonical
 # temp directory (TEMP_DIR/emscripten_temp).
 @memoize
 def get_emscripten_temp_dir():
   """Returns a path to EMSCRIPTEN_TEMP_DIR, creating one if it didn't exist."""
   global EMSCRIPTEN_TEMP_DIR
   if not EMSCRIPTEN_TEMP_DIR:
     EMSCRIPTEN_TEMP_DIR = tempfile.mkdtemp(prefix='emscripten_temp_', dir=TEMP_DIR)

     if not DEBUG_SAVE:
       def prepare_to_clean_temp(d):
         def clean_temp():
           utils.delete_dir(d)

         atexit.register(clean_temp)
       # this global var might change later
       prepare_to_clean_temp(EMSCRIPTEN_TEMP_DIR)
   return EMSCRIPTEN_TEMP_DIR


 def in_temp(name):
   return os.path.join(get_emscripten_temp_dir(), os.path.basename(name))


 def get_canonical_temp_dir(temp_dir):
   return os.path.join(temp_dir, 'emscripten_temp')


 def setup_temp_dirs():
   global EMSCRIPTEN_TEMP_DIR, CANONICAL_TEMP_DIR, TEMP_DIR
   EMSCRIPTEN_TEMP_DIR = None

   TEMP_DIR = os.environ.get("EMCC_TEMP_DIR", tempfile.gettempdir())
   if not os.path.isdir(TEMP_DIR):
     exit_with_error(f'The temporary directory `{TEMP_DIR}` does not exist! Please make sure that the path is correct.')

   CANONICAL_TEMP_DIR = get_canonical_temp_dir(TEMP_DIR)

   if DEBUG:
     EMSCRIPTEN_TEMP_DIR = CANONICAL_TEMP_DIR
     try:
       safe_ensure_dirs(EMSCRIPTEN_TEMP_DIR)
     except Exception as e:
       exit_with_error('error creating canonical temp dir (Check definition of TEMP_DIR in %s): %s', config.EM_CONFIG, e)

     # Since the canonical temp directory is, by definition, the same
     # between all processes that run in DEBUG mode we need to use a multi
     # process lock to prevent more than one process from writing to it.
     # This is because emcc assumes that it can use non-unique names inside
     # the temp directory.
     # Sadly we need to allow child processes to access this directory
     # though, since emcc can recursively call itself when building
     # libraries and ports.
     if 'EM_HAVE_TEMP_DIR_LOCK' not in os.environ:
       filelock_name = os.path.join(EMSCRIPTEN_TEMP_DIR, 'emscripten.lock')
       lock = filelock.FileLock(filelock_name)
       os.environ['EM_HAVE_TEMP_DIR_LOCK'] = '1'
       lock.acquire()
       atexit.register(lock.release)


 @memoize
 def get_temp_files():
   if DEBUG_SAVE:
     # In debug mode store all temp files in the emscripten-specific temp dir
     # and don't worry about cleaning them up.
     return tempfiles.TempFiles(get_emscripten_temp_dir(), save_debug_files=True)
   else:
     # Otherwise use the system tempdir and try to clean up after ourselves.
     return tempfiles.TempFiles(TEMP_DIR, save_debug_files=False)


 def print_compiler_stage(cmd):
   """Emulate the '-v/-###' flags of clang/gcc by printing the sub-commands
   that we run."""

   def maybe_quote(arg):
     if all(c.isalnum() or c in './-_' for c in arg):
       return arg
     else:
       return f'"{arg}"'

   if SKIP_SUBPROCS:
     print(' ' + ' '.join([maybe_quote(a) for a in cmd]), file=sys.stderr)
     sys.stderr.flush()
   elif PRINT_SUBPROCS:
     print(' %s %s' % (maybe_quote(cmd[0]), shlex.join(cmd[1:])), file=sys.stderr)
     sys.stderr.flush()


 def demangle_c_symbol_name(name):
   if not is_c_symbol(name):
     return '$' + name
   return name[1:] if name.startswith('_') else name


 def is_c_symbol(name):
   return name.startswith('_')


 def is_internal_global(name):
   internal_start_stop_symbols = {'__start_em_asm', '__stop_em_asm',
                                  '__start_em_js', '__stop_em_js',
                                  '__start_em_lib_deps', '__stop_em_lib_deps',
                                  '__em_lib_deps'}
   internal_prefixes = ('__em_js__', '__em_lib_deps')
   return name in internal_start_stop_symbols or any(name.startswith(p) for p in internal_prefixes)


 def is_user_export(name):
   if is_internal_global(name):
     return False
   return name not in ['__asyncify_data', '__asyncify_state', '__indirect_function_table', 'memory'] and not name.startswith(('dynCall_', 'orig$'))


 def asmjs_mangle(name):
   """Mangle a name the way asm.js/JSBackend globals are mangled.

   Prepends '_' and replaces non-alphanumerics with '_'.
   Used by wasm backend for JS library consistency with asm.js.
   """
   # We also use this function to convert the clang-mangled `__main_argc_argv`
   # to simply `main` which is expected by the emscripten JS glue code.
   if name == '__main_argc_argv':
     name = 'main'
   if is_user_export(name):
     return '_' + name
   return name


 def do_replace(input_, pattern, replacement):
   if pattern not in input_:
     exit_with_error('expected to find pattern in input JS: %s' % pattern)
   return input_.replace(pattern, replacement)


 def get_llvm_target():
   if settings.MEMORY64:
     return 'wasm64-unknown-emscripten'
   else:
     return 'wasm32-unknown-emscripten'


 def init():
   utils.set_version_globals()
   setup_temp_dirs()


 # ============================================================================
 # End declarations.
 # ============================================================================

 # Everything below this point is top level code that get run when importing this
 # file.  TODO(sbc): We should try to reduce that amount we do here and instead
 # have consumers explicitly call initialization functions.

 CLANG_CC = clang_tool_path('clang')
 CLANG_CXX = clang_tool_path('clang++')
 CLANG_SCAN_DEPS = llvm_tool_path('clang-scan-deps')
 LLVM_AR = llvm_tool_path('llvm-ar')
 LLVM_DWP = llvm_tool_path('llvm-dwp')
 LLVM_RANLIB = llvm_tool_path('llvm-ranlib')
 LLVM_NM = llvm_tool_path('llvm-nm')
 LLVM_DWARFDUMP = llvm_tool_path('llvm-dwarfdump')
 LLVM_OBJCOPY = llvm_tool_path('llvm-objcopy')
 LLVM_STRIP = llvm_tool_path('llvm-strip')
 WASM_LD = llvm_tool_path('wasm-ld')
 LLVM_PROFDATA = llvm_tool_path('llvm-profdata')
 LLVM_COV = llvm_tool_path('llvm-cov')

 EMCC = exe_path_from_root('emcc')
 EMXX = exe_path_from_root('em++')
 EMAR = exe_path_from_root('emar')
 EMRANLIB = exe_path_from_root('emranlib')
 FILE_PACKAGER = exe_path_from_root('tools/file_packager')
 # Windows .dll suffix is not included in this list, since those are never
 # linked to directly on the command line.
 DYLIB_EXTENSIONS = ['.dylib', '.so']

 run_via_emxx = False

 init()