| ############################################################################### |
| # Server process to keep track of unlinked resources (like shared memory |
| # segments, semaphores etc.) and clean them. |
| # |
| # On Unix we run a server process which keeps track of unlinked |
| # resources. The server ignores SIGINT and SIGTERM and reads from a |
| # pipe. Every other process of the program has a copy of the writable |
| # end of the pipe, so we get EOF when all other processes have exited. |
| # Then the server process unlinks any remaining resource names. |
| # |
| # This is important because there may be system limits for such resources: for |
| # instance, the system only supports a limited number of named semaphores, and |
| # shared-memory segments live in the RAM. If a python process leaks such a |
| # resource, this resource will not be removed till the next reboot. Without |
| # this resource tracker process, "killall python" would probably leave unlinked |
| # resources. |
| |
| import os |
| import signal |
| import sys |
| import threading |
| import warnings |
| from collections import deque |
| |
| from . import spawn |
| from . import util |
| |
| __all__ = ['ensure_running', 'register', 'unregister'] |
| |
| _HAVE_SIGMASK = hasattr(signal, 'pthread_sigmask') |
| _IGNORED_SIGNALS = (signal.SIGINT, signal.SIGTERM) |
| |
| def cleanup_noop(name): |
| raise RuntimeError('noop should never be registered or cleaned up') |
| |
| _CLEANUP_FUNCS = { |
| 'noop': cleanup_noop, |
| 'dummy': lambda name: None, # Dummy resource used in tests |
| } |
| |
| if os.name == 'posix': |
| import _multiprocessing |
| import _posixshmem |
| |
| # Use sem_unlink() to clean up named semaphores. |
| # |
| # sem_unlink() may be missing if the Python build process detected the |
| # absence of POSIX named semaphores. In that case, no named semaphores were |
| # ever opened, so no cleanup would be necessary. |
| if hasattr(_multiprocessing, 'sem_unlink'): |
| _CLEANUP_FUNCS['semaphore'] = _multiprocessing.sem_unlink |
| _CLEANUP_FUNCS['shared_memory'] = _posixshmem.shm_unlink |
| |
| |
| class ReentrantCallError(RuntimeError): |
| pass |
| |
| |
| class ResourceTracker(object): |
| |
| def __init__(self): |
| self._lock = threading.RLock() |
| self._fd = None |
| self._pid = None |
| self._exitcode = None |
| self._reentrant_messages = deque() |
| |
| def _reentrant_call_error(self): |
| # gh-109629: this happens if an explicit call to the ResourceTracker |
| # gets interrupted by a garbage collection, invoking a finalizer (*) |
| # that itself calls back into ResourceTracker. |
| # (*) for example the SemLock finalizer |
| raise ReentrantCallError( |
| "Reentrant call into the multiprocessing resource tracker") |
| |
| def __del__(self): |
| # making sure child processess are cleaned before ResourceTracker |
| # gets destructed. |
| # see https://github.com/python/cpython/issues/88887 |
| self._stop(use_blocking_lock=False) |
| |
| def _stop(self, use_blocking_lock=True): |
| if use_blocking_lock: |
| with self._lock: |
| self._stop_locked() |
| else: |
| acquired = self._lock.acquire(blocking=False) |
| try: |
| self._stop_locked() |
| finally: |
| if acquired: |
| self._lock.release() |
| |
| def _stop_locked( |
| self, |
| close=os.close, |
| waitpid=os.waitpid, |
| waitstatus_to_exitcode=os.waitstatus_to_exitcode, |
| ): |
| # This shouldn't happen (it might when called by a finalizer) |
| # so we check for it anyway. |
| if self._lock._recursion_count() > 1: |
| raise self._reentrant_call_error() |
| if self._fd is None: |
| # not running |
| return |
| if self._pid is None: |
| return |
| |
| # closing the "alive" file descriptor stops main() |
| close(self._fd) |
| self._fd = None |
| |
| _, status = waitpid(self._pid, 0) |
| |
| self._pid = None |
| |
| try: |
| self._exitcode = waitstatus_to_exitcode(status) |
| except ValueError: |
| # os.waitstatus_to_exitcode may raise an exception for invalid values |
| self._exitcode = None |
| |
| def getfd(self): |
| self.ensure_running() |
| return self._fd |
| |
| def ensure_running(self): |
| '''Make sure that resource tracker process is running. |
| |
| This can be run from any process. Usually a child process will use |
| the resource created by its parent.''' |
| return self._ensure_running_and_write() |
| |
| def _teardown_dead_process(self): |
| os.close(self._fd) |
| |
| # Clean-up to avoid dangling processes. |
| try: |
| # _pid can be None if this process is a child from another |
| # python process, which has started the resource_tracker. |
| if self._pid is not None: |
| os.waitpid(self._pid, 0) |
| except ChildProcessError: |
| # The resource_tracker has already been terminated. |
| pass |
| self._fd = None |
| self._pid = None |
| self._exitcode = None |
| |
| warnings.warn('resource_tracker: process died unexpectedly, ' |
| 'relaunching. Some resources might leak.') |
| |
| def _launch(self): |
| fds_to_pass = [] |
| try: |
| fds_to_pass.append(sys.stderr.fileno()) |
| except Exception: |
| pass |
| r, w = os.pipe() |
| try: |
| fds_to_pass.append(r) |
| # process will out live us, so no need to wait on pid |
| exe = spawn.get_executable() |
| args = [ |
| exe, |
| *util._args_from_interpreter_flags(), |
| '-c', |
| f'from multiprocessing.resource_tracker import main;main({r})', |
| ] |
| # bpo-33613: Register a signal mask that will block the signals. |
| # This signal mask will be inherited by the child that is going |
| # to be spawned and will protect the child from a race condition |
| # that can make the child die before it registers signal handlers |
| # for SIGINT and SIGTERM. The mask is unregistered after spawning |
| # the child. |
| prev_sigmask = None |
| try: |
| if _HAVE_SIGMASK: |
| prev_sigmask = signal.pthread_sigmask(signal.SIG_BLOCK, _IGNORED_SIGNALS) |
| pid = util.spawnv_passfds(exe, args, fds_to_pass) |
| finally: |
| if prev_sigmask is not None: |
| signal.pthread_sigmask(signal.SIG_SETMASK, prev_sigmask) |
| except: |
| os.close(w) |
| raise |
| else: |
| self._fd = w |
| self._pid = pid |
| finally: |
| os.close(r) |
| |
| def _ensure_running_and_write(self, msg=None): |
| with self._lock: |
| if self._lock._recursion_count() > 1: |
| # The code below is certainly not reentrant-safe, so bail out |
| if msg is None: |
| raise self._reentrant_call_error() |
| return self._reentrant_messages.append(msg) |
| |
| if self._fd is not None: |
| # resource tracker was launched before, is it still running? |
| if msg is None: |
| to_send = b'PROBE:0:noop\n' |
| else: |
| to_send = msg |
| try: |
| self._write(to_send) |
| except OSError: |
| self._teardown_dead_process() |
| self._launch() |
| |
| msg = None # message was sent in probe |
| else: |
| self._launch() |
| |
| while True: |
| try: |
| reentrant_msg = self._reentrant_messages.popleft() |
| except IndexError: |
| break |
| self._write(reentrant_msg) |
| if msg is not None: |
| self._write(msg) |
| |
| def _check_alive(self): |
| '''Check that the pipe has not been closed by sending a probe.''' |
| try: |
| # We cannot use send here as it calls ensure_running, creating |
| # a cycle. |
| os.write(self._fd, b'PROBE:0:noop\n') |
| except OSError: |
| return False |
| else: |
| return True |
| |
| def register(self, name, rtype): |
| '''Register name of resource with resource tracker.''' |
| self._send('REGISTER', name, rtype) |
| |
| def unregister(self, name, rtype): |
| '''Unregister name of resource with resource tracker.''' |
| self._send('UNREGISTER', name, rtype) |
| |
| def _write(self, msg): |
| nbytes = os.write(self._fd, msg) |
| assert nbytes == len(msg), f"{nbytes=} != {len(msg)=}" |
| |
| def _send(self, cmd, name, rtype): |
| msg = f"{cmd}:{name}:{rtype}\n".encode("ascii") |
| if len(msg) > 512: |
| # posix guarantees that writes to a pipe of less than PIPE_BUF |
| # bytes are atomic, and that PIPE_BUF >= 512 |
| raise ValueError('msg too long') |
| |
| self._ensure_running_and_write(msg) |
| |
| _resource_tracker = ResourceTracker() |
| ensure_running = _resource_tracker.ensure_running |
| register = _resource_tracker.register |
| unregister = _resource_tracker.unregister |
| getfd = _resource_tracker.getfd |
| |
| |
| def main(fd): |
| '''Run resource tracker.''' |
| # protect the process from ^C and "killall python" etc |
| signal.signal(signal.SIGINT, signal.SIG_IGN) |
| signal.signal(signal.SIGTERM, signal.SIG_IGN) |
| if _HAVE_SIGMASK: |
| signal.pthread_sigmask(signal.SIG_UNBLOCK, _IGNORED_SIGNALS) |
| |
| for f in (sys.stdin, sys.stdout): |
| try: |
| f.close() |
| except Exception: |
| pass |
| |
| cache = {rtype: set() for rtype in _CLEANUP_FUNCS.keys()} |
| exit_code = 0 |
| |
| try: |
| # keep track of registered/unregistered resources |
| with open(fd, 'rb') as f: |
| for line in f: |
| try: |
| cmd, name, rtype = line.strip().decode('ascii').split(':') |
| cleanup_func = _CLEANUP_FUNCS.get(rtype, None) |
| if cleanup_func is None: |
| raise ValueError( |
| f'Cannot register {name} for automatic cleanup: ' |
| f'unknown resource type {rtype}') |
| |
| if cmd == 'REGISTER': |
| cache[rtype].add(name) |
| elif cmd == 'UNREGISTER': |
| cache[rtype].remove(name) |
| elif cmd == 'PROBE': |
| pass |
| else: |
| raise RuntimeError('unrecognized command %r' % cmd) |
| except Exception: |
| exit_code = 3 |
| try: |
| sys.excepthook(*sys.exc_info()) |
| except: |
| pass |
| finally: |
| # all processes have terminated; cleanup any remaining resources |
| for rtype, rtype_cache in cache.items(): |
| if rtype_cache: |
| try: |
| exit_code = 1 |
| if rtype == 'dummy': |
| # The test 'dummy' resource is expected to leak. |
| # We skip the warning (and *only* the warning) for it. |
| pass |
| else: |
| warnings.warn( |
| f'resource_tracker: There appear to be ' |
| f'{len(rtype_cache)} leaked {rtype} objects to ' |
| f'clean up at shutdown: {rtype_cache}' |
| ) |
| except Exception: |
| pass |
| for name in rtype_cache: |
| # For some reason the process which created and registered this |
| # resource has failed to unregister it. Presumably it has |
| # died. We therefore unlink it. |
| try: |
| try: |
| _CLEANUP_FUNCS[rtype](name) |
| except Exception as e: |
| exit_code = 2 |
| warnings.warn('resource_tracker: %r: %s' % (name, e)) |
| finally: |
| pass |
| |
| sys.exit(exit_code) |