lib/namespaces.py - chromiumos/chromite - Git at Google

 # Copyright 2013 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Support for Linux namespaces"""

 import contextlib
 import ctypes
 import errno
 import logging
 import os
 import signal

 # Note: We avoid cros_build_lib here as that's a "large" module and we want
 # to keep this "light" and standalone.  The subprocess usage in here is also
 # simple by design -- if it gets more complicated, we should look at using
 # the cros_build_lib.run helper.
 import subprocess
 import sys
 from typing import List, Optional

 from chromite.lib import commandline
 from chromite.lib import locking
 from chromite.lib import osutils
 from chromite.lib import process_util
 from chromite.utils import libc
 from chromite.utils import os_util
 from chromite.utils import proctitle_util


 CLONE_FILES = 0x00000400
 CLONE_FS = 0x00000200
 CLONE_NEWCGROUP = 0x02000000
 CLONE_NEWIPC = 0x08000000
 CLONE_NEWNET = 0x40000000
 CLONE_NEWNS = 0x00020000
 CLONE_NEWPID = 0x20000000
 CLONE_NEWUSER = 0x10000000
 CLONE_NEWUTS = 0x04000000


 def SetNS(fd, nstype) -> None:
     """Binding to the Linux setns system call. See setns(2) for details.

     Args:
         fd: An open file descriptor or path to one.
         nstype: Namespace to enter; one of CLONE_*.

     Raises:
         OSError: if setns failed.
     """
     try:
         fp = None
         if isinstance(fd, str):
             fp = open(fd, "wb")  # pylint: disable=consider-using-with
             fd = fp.fileno()

         if libc.GetLibc().setns(ctypes.c_int(fd), ctypes.c_int(nstype)) != 0:
             e = ctypes.get_errno()
             raise OSError(e, os.strerror(e))
     finally:
         if fp is not None:
             fp.close()


 def Unshare(flags) -> None:
     """Binding to the Linux unshare system call. See unshare(2) for details.

     Args:
         flags: Namespaces to unshare; bitwise OR of CLONE_* flags.

     Raises:
         OSError: if unshare failed.
     """
     if libc.GetLibc().unshare(ctypes.c_int(flags)) != 0:
         e = ctypes.get_errno()
         raise OSError(e, os.strerror(e))


 def _ReapChildren(
     pid: int,
     uid: Optional[int],
     gid: Optional[int],
     stop_lock: Optional[locking.PipeLock] = None,
 ) -> None:
     """Reap all children that get reparented to us until we see |pid| exit.

     SIGSTOP handling notes: we want process reapers to handle stopped children
     by propagating SIGSTOP upwards (i.e., stopping themselves). However, PID 1
     (in the namespace) can't be stopped; instead, we use a PipeLock to
     communicate upward.

     Args:
         pid: The main child to watch for.
         uid: The user to switch to first.
         gid: The group to switch to first.
         stop_lock: The lock to post to when our child stops.
     """
     if gid is not None:
         os.setgid(gid)
     if uid is not None:
         os.setuid(uid)

     while True:
         try:
             (wpid, status) = os.waitpid(-1, os.WUNTRACED)
             if pid == wpid:
                 if os.WIFSTOPPED(status):
                     # Propagate the stoppage upwards one way or another.
                     if stop_lock:
                         stop_lock.Post()
                     else:
                         # In practice, we don't get here, because the only
                         # caller (the external init manager) should only have a
                         # single, unstoppable child (the init manager). But we
                         # include this for completeness.
                         os.kill(os.getpgrp(), signal.SIGSTOP)
                     continue
                 process_util.ExitAsStatus(status)
         except OSError as e:
             if e.errno == errno.ECHILD:
                 raise ValueError(
                     "All children of the current processes have been reaped, "
                     f"but {pid} was not one of them. This means that {pid} "
                     "is not a child of the current processes."
                 )
             elif e.errno != errno.EINTR:
                 raise


 def _SafeTcSetPgrp(fd, pgrp) -> None:
     """Set |pgrp| as the controller of the tty |fd|."""
     try:
         curr_pgrp = os.tcgetpgrp(fd)
     except OSError as e:
         # This can come up when the fd is not connected to a terminal.
         if e.errno == errno.ENOTTY:
             return
         raise

     # We can change the owner only if currently own it.  Otherwise we'll get
     # stopped by the kernel with SIGTTOU and that'll hit the whole group.
     if curr_pgrp == os.getpgrp():
         os.tcsetpgrp(fd, pgrp)


 def _ForwardToChildPid(
     pid: int, signal_to_forward: int, group: bool = False
 ) -> None:
     """Setup a signal handler that forwards the given signal to children.

     Args:
         pid: Process to target.
         signal_to_forward: Signal number to forward.
         group: If False, forward the signal only to the |pid| in question. If
             True, forward to the entire process group that |pid| belongs to.
     """

     def _ForwardingHandler(signum, _frame) -> None:
         try:
             if group:
                 os.killpg(os.getpgid(pid), signum)
             else:
                 os.kill(pid, signum)
         except ProcessLookupError:
             # The target PID might have already exited, and thus we get a
             # ProcessLookupError when trying to send it a signal.
             logging.debug(
                 "Can't forward signal %u to pid %u as it doesn't exist",
                 signum,
                 pid,
             )

     signal.signal(signal_to_forward, _ForwardingHandler)


 def CreatePidNs(uid: Optional[int] = None, gid: Optional[int] = None) -> int:
     """Start a new pid namespace.

     This will launch all the right manager processes.  The child that returns
     will be isolated in a new pid namespace.

     If functionality is not available, then it will return w/out doing anything.

     A note about the processes generated as a result of calling this function:
     You call CreatePidNs() in pid X
     - X launches Pid Y,
       - Pid X will now do nothing but wait for Pid Y to finish and then
         sys.exit() with that return code
       - Y launches Pid Z
         - Pid Y will now do nothing but wait for Pid Z to finish and then
           sys.exit() with that return code
         - **Pid Z returns from CreatePidNs**. So, the caller of this function
           continues in a different process than the one that made the call.
             - All SIGTERM/SIGINT signals are forwarded down from pid X to pid Z
               to handle.
             - SIGKILL will only kill pid X, and leak Pid Y and Z.
             - SIGTSTP/SIGSTOP on Z will propagate out to Y (which can't stop,
               as it is PID 1) and then out to Z via stop_lock.
             - SIGCONT is forwarded from X to Z.

     Args:
         uid: The user to run the init processes as.
         gid: The group to run the init processes as.

     Returns:
         The last pid outside of the namespace. (i.e., pid X)
     """
     first_pid = os.getpid()

     try:
         # First create the namespace.
         Unshare(CLONE_NEWPID)
     except OSError as e:
         if e.errno == errno.EINVAL:
             # For older kernels, or the functionality is disabled in the config,
             # return silently.  We don't want to hard require this stuff.
             return first_pid
         else:
             # For all other errors, abort.  They shouldn't happen.
             raise

     # Used to make sure process groups are in the right state before we try to
     # forward the controlling terminal.
     lock = locking.PipeLock()

     # The new PID 1 can't SIGSTOP itself, so we'll use this lock to notify the
     # external init when it's time to SIGSTOP.
     stop_lock = locking.PipeLock()

     # Now that we're in the new pid namespace, fork.  The parent is the master
     # of it in the original namespace, so it only monitors the child inside it.
     # It is only allowed to fork once too.
     pid = os.fork()
     if pid:
         proctitle_util.settitle("pid ns", "external init")

         # We forward termination signals to the child and trust the child to
         # respond sanely. Later, ExitAsStatus propagates the exit status back
         # up.
         # We also forward continuation signals to the entire group (in shell
         # job control fashion), so we all resume together if we're ever
         # SIGSTOP'd.
         _ForwardToChildPid(pid, signal.SIGINT)
         _ForwardToChildPid(pid, signal.SIGTERM)
         _ForwardToChildPid(pid, signal.SIGCONT, group=True)

         # Forward the control of the terminal to the child so it can manage
         # input.
         _SafeTcSetPgrp(sys.stdin.fileno(), pid)

         # Signal our child it can move forward.
         lock.Post()
         del lock

         # Wait for our child to stop (in which case they Post non-empty
         # contents) or terminate (broken / empty pipe). We only have 1 child,
         # so it's OK to defer reaping until the lock is closed.
         while len(stop_lock.Wait()) != 0:
             # Child Post()ed; that means we want to propagate SIGSTOP to the
             # group.
             os.killpg(os.getpgrp(), signal.SIGSTOP)

         # Reap the children as the parent of the new namespace.
         _ReapChildren(pid, uid=uid, gid=gid)
         # Shouldn't get here, but clean up for completeness.
         del stop_lock
     else:
         # Make sure to unshare the existing mount point if needed.  Some distros
         # create shared mount points everywhere by default.
         try:
             osutils.Mount(
                 "none", "/proc", 0, osutils.MS_PRIVATE | osutils.MS_REC
             )
         except OSError as e:
             if e.errno != errno.EINVAL:
                 raise

         # The child needs its own proc mount as it'll be different.
         osutils.Mount(
             "proc",
             "/proc",
             "proc",
             osutils.MS_NOSUID
             | osutils.MS_NODEV
             | osutils.MS_NOEXEC
             | osutils.MS_RELATIME,
         )

         # Wait for our parent to finish initialization.
         lock.Wait()
         del lock

         # Resetup the locks for the next phase.
         lock = locking.PipeLock()

         pid = os.fork()
         if pid:
             proctitle_util.settitle("pid ns", "init")

             # We forward termination signals to the child and trust the child to
             # respond sanely. Later, ExitAsStatus propagates the exit status
             # back up.
             # We also forward continuation signals to the entire group (in
             # shell job control fashion), so we all resume together if we're
             # ever SIGSTOP'd.
             _ForwardToChildPid(pid, signal.SIGINT)
             _ForwardToChildPid(pid, signal.SIGTERM)
             _ForwardToChildPid(pid, signal.SIGCONT, group=True)

             # Now that we're in a new pid namespace, start a new process group
             # so that children have something valid to use.  Otherwise
             # getpgrp/etc... will get back 0 which tends to confuse -- you can't
             # setpgrp(0) for example.
             os.setpgrp()

             # Forward the control of the terminal to the child so it can manage
             # input.
             _SafeTcSetPgrp(sys.stdin.fileno(), pid)

             # Signal our child it can move forward.
             lock.Post()
             del lock

             # Watch all the children.  We need to act as the master inside the
             # namespace and reap old processes.
             _ReapChildren(pid, uid=uid, gid=gid, stop_lock=stop_lock)
             # Shouldn't get here, but clean up for completeness.
             del stop_lock

     # Grandchild doesn't need this lock.
     del stop_lock

     # Wait for our parent to finish initialization.
     lock.Wait()
     del lock

     # Create a process group for the grandchild so it can manage things
     # independent of the init process.
     os.setpgrp()

     # The grandchild will return and take over the rest of the sdk steps.
     return first_pid


 def CreateNetNs() -> None:
     """Start a new net namespace

     We will bring up the loopback interface, but that is all.

     If functionality is not available, then it will return w/out doing anything.
     """
     # The net namespace was added in 2.6.24 and may be disabled in the kernel.
     try:
         Unshare(CLONE_NEWNET)
     except OSError as e:
         if e.errno == errno.EINVAL:
             return
         else:
             # For all other errors, abort.  They shouldn't happen.
             raise

     # Since we've unshared the net namespace, we need to bring up loopback.
     # The kernel automatically adds the various ip addresses, so skip that.
     try:
         subprocess.call(["ip", "link", "set", "up", "lo"])
     except OSError as e:
         if e.errno == errno.ENOENT:
             print(
                 "warning: could not bring up loopback for network; "
                 "install the iproute2 package",
                 file=sys.stderr,
             )
         else:
             raise


 def CreateUserNs(new_uid: int = 0, new_gid: int = 0) -> None:
     """Start a user namespace

     This will create a new user namespace and move the current process into it.
     It will fail if the current process is multi-threaded.

     In the new user namespace, the current process will:
     - have specified new UID/GID
     - have all capabilities (with the namespace)

     This function is useful when you want to enter other namespaces (e.g. mount
     namespace) without root privileges.

     Args:
         new_uid: UID that will be mapped to the UID in the original namespace.
         new_gid: GID that will be mapped to the GID in the original namespace.
     """
     orig_uid = os.getuid()
     orig_gid = os.getgid()

     Unshare(CLONE_NEWUSER)

     # Set up a UID/GID mapping that maps the original UID/GID to the requested
     # UID and GID in the new user namespace. The order of writing these files
     # matters.
     # See `man 1 user_namespaces` for details.
     with open("/proc/self/setgroups", "w", encoding="utf-8") as f:
         f.write("deny")
     with open("/proc/self/uid_map", "w", encoding="utf-8") as f:
         f.write(f"{new_uid} {orig_uid} 1\n")
     with open("/proc/self/gid_map", "w", encoding="utf-8") as f:
         f.write(f"{new_gid} {orig_gid} 1\n")


 def SimpleUnshare(
     mount: bool = True,
     uts: bool = True,
     ipc: bool = True,
     net: bool = False,
     pid: bool = False,
     cgroup: bool = False,
     pid_uid: Optional[int] = None,
     pid_gid: Optional[int] = None,
 ) -> None:
     """Simpler helper for setting up namespaces quickly.

     If support for any namespace type is not available, we'll silently skip it.

     Args:
         mount: Create a mount namespace.
         uts: Create a UTS namespace.
         ipc: Create an IPC namespace.
         net: Create a net namespace.
         pid: Create a pid namespace.
         cgroup: Create a cgroup namespace.
         pid_uid: The UID to switch the init to when creating a pid namespace.
         pid_gid: The GID to switch the init to when creating a pid namespace.
     """
     # The mount namespace is the only one really guaranteed to exist --
     # it's been supported forever and it cannot be turned off.
     if mount:
         Unshare(CLONE_NEWNS)

     # The UTS namespace was added 2.6.19 and may be disabled in the kernel.
     if uts:
         try:
             Unshare(CLONE_NEWUTS)
         except OSError as e:
             if e.errno != errno.EINVAL:
                 pass

     # The IPC namespace was added 2.6.19 and may be disabled in the kernel.
     if ipc:
         try:
             Unshare(CLONE_NEWIPC)
         except OSError as e:
             if e.errno != errno.EINVAL:
                 pass

     if net:
         CreateNetNs()

     if pid:
         CreatePidNs(uid=pid_uid, gid=pid_gid)

     # The cgroup namespace was added in 4.6 and may be disabled in the kernel.
     if cgroup:
         try:
             Unshare(CLONE_NEWCGROUP)
         except OSError as e:
             if e.errno != errno.EINVAL:
                 pass

     # We considered unsharing the time namespace as well.  Unfortunately,
     # the usefulness of time namespaces is limited:
     # - they only isolate the CLOCK_BOOTTIME and CLOCK_MONOTONIC clocks
     # - there's no way to set these clocks apart from updating the offset in the
     #   /proc/self/timens_offset file, which cannot be edited after a process
     #   has been created in the new time namespace
     # - CLOCK_REALTIME is not isolated
     # Hence we've left them out.


 def ReExecuteWithNamespace(
     argv: List[str],
     preserve_env: bool = False,
     network: bool = False,
     clear_saved_id: bool = False,
 ) -> None:
     """Re-execute as root so we can unshare resources.

     Args:
         argv: Command line arguments to run as root user.
         preserve_env: If True, preserve existing environment variables when
             running as root user.
         network: If False, disable access to the network.
         clear_saved_id: Whether to clear the saved-uid & saved-gid.  See
             os_util.switch_to_sudo_user.
     """
     # Re-run the command as a root user in order to create the namespaces.
     # Ideally, we can rework this logic to swap to the root user in a way that
     # doesn't involve re-executing the command.
     commandline.RunAsRootUser(argv, preserve_env=preserve_env)

     SimpleUnshare(net=not network, pid=True)
     # We got our namespaces, so switch back to the non-root user.
     os_util.switch_to_sudo_user(clear_saved_id=clear_saved_id)


 @contextlib.contextmanager
 def use_network_sandbox():
     """Context manager to manage switching between network namespaces.

     The default behavior here is to disallow network connectivity during core
     client execution, and restore network connectivity on client completion to
     perform tasks which require the previous network state.
     """

     network_fd = None
     with contextlib.ExitStack() as stack:
         try:
             # Get an open handle to a working network namespace so we can switch
             # back to it for network-dependent operations (e.g. telemetry
             # uploads).
             # pylint: disable=consider-using-with
             network_fd = stack.enter_context(open("/proc/self/ns/net", "rb"))
             logging.debug(
                 "open %s %s",
                 network_fd.fileno(),
                 os.readlink("/proc/self/ns/net"),
             )
         except OSError as e:
             logging.debug(
                 "failed to open file descriptor to current network namespace: "
                 "%s",
                 repr(e),
             )

         try:
             # Make sure we run with network disabled to prevent leakage.
             SimpleUnshare(net=True, pid=True)
             # We got our namespaces, so switch back to the non-root user.
             os_util.switch_to_sudo_user()
         except OSError as e:
             logging.warning("an unshare(2) operation failed: %s", repr(e))

         try:
             yield
         finally:
             # Don't attempt SetNS if we don't have a useful file descriptor for
             # the network namespace.
             if network_fd:
                 try:
                     # Turn network back on to allow containing telemetry trace
                     # to be sent to clearcut.
                     os.setresuid(0, 0, -1)
                     os.setresgid(0, 0, -1)
                     SetNS(network_fd.fileno(), CLONE_NEWNET)
                 except OSError as e:
                     logging.warning(
                         "Trying to re-enter original network namespace failed: "
                         "%s",
                         repr(e),
                     )
	# Copyright 2013 The ChromiumOS Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Support for Linux namespaces"""

	import contextlib
	import ctypes
	import errno
	import logging
	import os
	import signal

	# Note: We avoid cros_build_lib here as that's a "large" module and we want
	# to keep this "light" and standalone. The subprocess usage in here is also
	# simple by design -- if it gets more complicated, we should look at using
	# the cros_build_lib.run helper.
	import subprocess
	import sys
	from typing import List, Optional

	from chromite.lib import commandline
	from chromite.lib import locking
	from chromite.lib import osutils
	from chromite.lib import process_util
	from chromite.utils import libc
	from chromite.utils import os_util
	from chromite.utils import proctitle_util


	CLONE_FILES = 0x00000400
	CLONE_FS = 0x00000200
	CLONE_NEWCGROUP = 0x02000000
	CLONE_NEWIPC = 0x08000000
	CLONE_NEWNET = 0x40000000
	CLONE_NEWNS = 0x00020000
	CLONE_NEWPID = 0x20000000
	CLONE_NEWUSER = 0x10000000
	CLONE_NEWUTS = 0x04000000


	def SetNS(fd, nstype) -> None:
	"""Binding to the Linux setns system call. See setns(2) for details.

	Args:
	fd: An open file descriptor or path to one.
	nstype: Namespace to enter; one of CLONE_*.

	Raises:
	OSError: if setns failed.
	"""
	try:
	fp = None
	if isinstance(fd, str):
	fp = open(fd, "wb") # pylint: disable=consider-using-with
	fd = fp.fileno()

	if libc.GetLibc().setns(ctypes.c_int(fd), ctypes.c_int(nstype)) != 0:
	e = ctypes.get_errno()
	raise OSError(e, os.strerror(e))
	finally:
	if fp is not None:
	fp.close()


	def Unshare(flags) -> None:
	"""Binding to the Linux unshare system call. See unshare(2) for details.

	Args:
	flags: Namespaces to unshare; bitwise OR of CLONE_* flags.

	Raises:
	OSError: if unshare failed.
	"""
	if libc.GetLibc().unshare(ctypes.c_int(flags)) != 0:
	e = ctypes.get_errno()
	raise OSError(e, os.strerror(e))


	def _ReapChildren(
	pid: int,
	uid: Optional[int],
	gid: Optional[int],
	stop_lock: Optional[locking.PipeLock] = None,
	) -> None:
	"""Reap all children that get reparented to us until we see \|pid\| exit.

	SIGSTOP handling notes: we want process reapers to handle stopped children
	by propagating SIGSTOP upwards (i.e., stopping themselves). However, PID 1
	(in the namespace) can't be stopped; instead, we use a PipeLock to
	communicate upward.

	Args:
	pid: The main child to watch for.
	uid: The user to switch to first.
	gid: The group to switch to first.
	stop_lock: The lock to post to when our child stops.
	"""
	if gid is not None:
	os.setgid(gid)
	if uid is not None:
	os.setuid(uid)

	while True:
	try:
	(wpid, status) = os.waitpid(-1, os.WUNTRACED)
	if pid == wpid:
	if os.WIFSTOPPED(status):
	# Propagate the stoppage upwards one way or another.
	if stop_lock:
	stop_lock.Post()
	else:
	# In practice, we don't get here, because the only
	# caller (the external init manager) should only have a
	# single, unstoppable child (the init manager). But we
	# include this for completeness.
	os.kill(os.getpgrp(), signal.SIGSTOP)
	continue
	process_util.ExitAsStatus(status)
	except OSError as e:
	if e.errno == errno.ECHILD:
	raise ValueError(
	"All children of the current processes have been reaped, "
	f"but {pid} was not one of them. This means that {pid} "
	"is not a child of the current processes."
	)
	elif e.errno != errno.EINTR:
	raise


	def _SafeTcSetPgrp(fd, pgrp) -> None:
	"""Set \|pgrp\| as the controller of the tty \|fd\|."""
	try:
	curr_pgrp = os.tcgetpgrp(fd)
	except OSError as e:
	# This can come up when the fd is not connected to a terminal.
	if e.errno == errno.ENOTTY:
	return
	raise

	# We can change the owner only if currently own it. Otherwise we'll get
	# stopped by the kernel with SIGTTOU and that'll hit the whole group.
	if curr_pgrp == os.getpgrp():
	os.tcsetpgrp(fd, pgrp)


	def _ForwardToChildPid(
	pid: int, signal_to_forward: int, group: bool = False
	) -> None:
	"""Setup a signal handler that forwards the given signal to children.

	Args:
	pid: Process to target.
	signal_to_forward: Signal number to forward.
	group: If False, forward the signal only to the \|pid\| in question. If
	True, forward to the entire process group that \|pid\| belongs to.
	"""

	def _ForwardingHandler(signum, _frame) -> None:
	try:
	if group:
	os.killpg(os.getpgid(pid), signum)
	else:
	os.kill(pid, signum)
	except ProcessLookupError:
	# The target PID might have already exited, and thus we get a
	# ProcessLookupError when trying to send it a signal.
	logging.debug(
	"Can't forward signal %u to pid %u as it doesn't exist",
	signum,
	pid,
	)

	signal.signal(signal_to_forward, _ForwardingHandler)


	def CreatePidNs(uid: Optional[int] = None, gid: Optional[int] = None) -> int:
	"""Start a new pid namespace.

	This will launch all the right manager processes. The child that returns
	will be isolated in a new pid namespace.

	If functionality is not available, then it will return w/out doing anything.

	A note about the processes generated as a result of calling this function:
	You call CreatePidNs() in pid X
	- X launches Pid Y,
	- Pid X will now do nothing but wait for Pid Y to finish and then
	sys.exit() with that return code
	- Y launches Pid Z
	- Pid Y will now do nothing but wait for Pid Z to finish and then
	sys.exit() with that return code
	- Pid Z returns from CreatePidNs. So, the caller of this function
	continues in a different process than the one that made the call.
	- All SIGTERM/SIGINT signals are forwarded down from pid X to pid Z
	to handle.
	- SIGKILL will only kill pid X, and leak Pid Y and Z.
	- SIGTSTP/SIGSTOP on Z will propagate out to Y (which can't stop,
	as it is PID 1) and then out to Z via stop_lock.
	- SIGCONT is forwarded from X to Z.

	Args:
	uid: The user to run the init processes as.
	gid: The group to run the init processes as.

	Returns:
	The last pid outside of the namespace. (i.e., pid X)
	"""
	first_pid = os.getpid()

	try:
	# First create the namespace.
	Unshare(CLONE_NEWPID)
	except OSError as e:
	if e.errno == errno.EINVAL:
	# For older kernels, or the functionality is disabled in the config,
	# return silently. We don't want to hard require this stuff.
	return first_pid
	else:
	# For all other errors, abort. They shouldn't happen.
	raise

	# Used to make sure process groups are in the right state before we try to
	# forward the controlling terminal.
	lock = locking.PipeLock()

	# The new PID 1 can't SIGSTOP itself, so we'll use this lock to notify the
	# external init when it's time to SIGSTOP.
	stop_lock = locking.PipeLock()

	# Now that we're in the new pid namespace, fork. The parent is the master
	# of it in the original namespace, so it only monitors the child inside it.
	# It is only allowed to fork once too.
	pid = os.fork()
	if pid:
	proctitle_util.settitle("pid ns", "external init")

	# We forward termination signals to the child and trust the child to
	# respond sanely. Later, ExitAsStatus propagates the exit status back
	# up.
	# We also forward continuation signals to the entire group (in shell
	# job control fashion), so we all resume together if we're ever
	# SIGSTOP'd.
	_ForwardToChildPid(pid, signal.SIGINT)
	_ForwardToChildPid(pid, signal.SIGTERM)
	_ForwardToChildPid(pid, signal.SIGCONT, group=True)

	# Forward the control of the terminal to the child so it can manage
	# input.
	_SafeTcSetPgrp(sys.stdin.fileno(), pid)

	# Signal our child it can move forward.
	lock.Post()
	del lock

	# Wait for our child to stop (in which case they Post non-empty
	# contents) or terminate (broken / empty pipe). We only have 1 child,
	# so it's OK to defer reaping until the lock is closed.
	while len(stop_lock.Wait()) != 0:
	# Child Post()ed; that means we want to propagate SIGSTOP to the
	# group.
	os.killpg(os.getpgrp(), signal.SIGSTOP)

	# Reap the children as the parent of the new namespace.
	_ReapChildren(pid, uid=uid, gid=gid)
	# Shouldn't get here, but clean up for completeness.
	del stop_lock
	else:
	# Make sure to unshare the existing mount point if needed. Some distros
	# create shared mount points everywhere by default.
	try:
	osutils.Mount(
	"none", "/proc", 0, osutils.MS_PRIVATE \| osutils.MS_REC
	)
	except OSError as e:
	if e.errno != errno.EINVAL:
	raise

	# The child needs its own proc mount as it'll be different.
	osutils.Mount(
	"proc",
	"/proc",
	"proc",
	osutils.MS_NOSUID
	\| osutils.MS_NODEV
	\| osutils.MS_NOEXEC
	\| osutils.MS_RELATIME,
	)

	# Wait for our parent to finish initialization.
	lock.Wait()
	del lock

	# Resetup the locks for the next phase.
	lock = locking.PipeLock()

	pid = os.fork()
	if pid:
	proctitle_util.settitle("pid ns", "init")

	# We forward termination signals to the child and trust the child to
	# respond sanely. Later, ExitAsStatus propagates the exit status
	# back up.
	# We also forward continuation signals to the entire group (in
	# shell job control fashion), so we all resume together if we're
	# ever SIGSTOP'd.
	_ForwardToChildPid(pid, signal.SIGINT)
	_ForwardToChildPid(pid, signal.SIGTERM)
	_ForwardToChildPid(pid, signal.SIGCONT, group=True)

	# Now that we're in a new pid namespace, start a new process group
	# so that children have something valid to use. Otherwise
	# getpgrp/etc... will get back 0 which tends to confuse -- you can't
	# setpgrp(0) for example.
	os.setpgrp()

	# Forward the control of the terminal to the child so it can manage
	# input.
	_SafeTcSetPgrp(sys.stdin.fileno(), pid)

	# Signal our child it can move forward.
	lock.Post()
	del lock

	# Watch all the children. We need to act as the master inside the
	# namespace and reap old processes.
	_ReapChildren(pid, uid=uid, gid=gid, stop_lock=stop_lock)
	# Shouldn't get here, but clean up for completeness.
	del stop_lock

	# Grandchild doesn't need this lock.
	del stop_lock

	# Wait for our parent to finish initialization.
	lock.Wait()
	del lock

	# Create a process group for the grandchild so it can manage things
	# independent of the init process.
	os.setpgrp()

	# The grandchild will return and take over the rest of the sdk steps.
	return first_pid


	def CreateNetNs() -> None:
	"""Start a new net namespace

	We will bring up the loopback interface, but that is all.

	If functionality is not available, then it will return w/out doing anything.
	"""
	# The net namespace was added in 2.6.24 and may be disabled in the kernel.
	try:
	Unshare(CLONE_NEWNET)
	except OSError as e:
	if e.errno == errno.EINVAL:
	return
	else:
	# For all other errors, abort. They shouldn't happen.
	raise

	# Since we've unshared the net namespace, we need to bring up loopback.
	# The kernel automatically adds the various ip addresses, so skip that.
	try:
	subprocess.call(["ip", "link", "set", "up", "lo"])
	except OSError as e:
	if e.errno == errno.ENOENT:
	print(
	"warning: could not bring up loopback for network; "
	"install the iproute2 package",
	file=sys.stderr,
	)
	else:
	raise


	def CreateUserNs(new_uid: int = 0, new_gid: int = 0) -> None:
	"""Start a user namespace

	This will create a new user namespace and move the current process into it.
	It will fail if the current process is multi-threaded.

	In the new user namespace, the current process will:
	- have specified new UID/GID
	- have all capabilities (with the namespace)

	This function is useful when you want to enter other namespaces (e.g. mount
	namespace) without root privileges.

	Args:
	new_uid: UID that will be mapped to the UID in the original namespace.
	new_gid: GID that will be mapped to the GID in the original namespace.
	"""
	orig_uid = os.getuid()
	orig_gid = os.getgid()

	Unshare(CLONE_NEWUSER)

	# Set up a UID/GID mapping that maps the original UID/GID to the requested
	# UID and GID in the new user namespace. The order of writing these files
	# matters.
	# See `man 1 user_namespaces` for details.
	with open("/proc/self/setgroups", "w", encoding="utf-8") as f:
	f.write("deny")
	with open("/proc/self/uid_map", "w", encoding="utf-8") as f:
	f.write(f"{new_uid} {orig_uid} 1\n")
	with open("/proc/self/gid_map", "w", encoding="utf-8") as f:
	f.write(f"{new_gid} {orig_gid} 1\n")


	def SimpleUnshare(
	mount: bool = True,
	uts: bool = True,
	ipc: bool = True,
	net: bool = False,
	pid: bool = False,
	cgroup: bool = False,
	pid_uid: Optional[int] = None,
	pid_gid: Optional[int] = None,
	) -> None:
	"""Simpler helper for setting up namespaces quickly.

	If support for any namespace type is not available, we'll silently skip it.

	Args:
	mount: Create a mount namespace.
	uts: Create a UTS namespace.
	ipc: Create an IPC namespace.
	net: Create a net namespace.
	pid: Create a pid namespace.
	cgroup: Create a cgroup namespace.
	pid_uid: The UID to switch the init to when creating a pid namespace.
	pid_gid: The GID to switch the init to when creating a pid namespace.
	"""
	# The mount namespace is the only one really guaranteed to exist --
	# it's been supported forever and it cannot be turned off.
	if mount:
	Unshare(CLONE_NEWNS)

	# The UTS namespace was added 2.6.19 and may be disabled in the kernel.
	if uts:
	try:
	Unshare(CLONE_NEWUTS)
	except OSError as e:
	if e.errno != errno.EINVAL:
	pass

	# The IPC namespace was added 2.6.19 and may be disabled in the kernel.
	if ipc:
	try:
	Unshare(CLONE_NEWIPC)
	except OSError as e:
	if e.errno != errno.EINVAL:
	pass

	if net:
	CreateNetNs()

	if pid:
	CreatePidNs(uid=pid_uid, gid=pid_gid)

	# The cgroup namespace was added in 4.6 and may be disabled in the kernel.
	if cgroup:
	try:
	Unshare(CLONE_NEWCGROUP)
	except OSError as e:
	if e.errno != errno.EINVAL:
	pass

	# We considered unsharing the time namespace as well. Unfortunately,
	# the usefulness of time namespaces is limited:
	# - they only isolate the CLOCK_BOOTTIME and CLOCK_MONOTONIC clocks
	# - there's no way to set these clocks apart from updating the offset in the
	# /proc/self/timens_offset file, which cannot be edited after a process
	# has been created in the new time namespace
	# - CLOCK_REALTIME is not isolated
	# Hence we've left them out.


	def ReExecuteWithNamespace(
	argv: List[str],
	preserve_env: bool = False,
	network: bool = False,
	clear_saved_id: bool = False,
	) -> None:
	"""Re-execute as root so we can unshare resources.

	Args:
	argv: Command line arguments to run as root user.
	preserve_env: If True, preserve existing environment variables when
	running as root user.
	network: If False, disable access to the network.
	clear_saved_id: Whether to clear the saved-uid & saved-gid. See
	os_util.switch_to_sudo_user.
	"""
	# Re-run the command as a root user in order to create the namespaces.
	# Ideally, we can rework this logic to swap to the root user in a way that
	# doesn't involve re-executing the command.
	commandline.RunAsRootUser(argv, preserve_env=preserve_env)

	SimpleUnshare(net=not network, pid=True)
	# We got our namespaces, so switch back to the non-root user.
	os_util.switch_to_sudo_user(clear_saved_id=clear_saved_id)


	@contextlib.contextmanager
	def use_network_sandbox():
	"""Context manager to manage switching between network namespaces.

	The default behavior here is to disallow network connectivity during core
	client execution, and restore network connectivity on client completion to
	perform tasks which require the previous network state.
	"""

	network_fd = None
	with contextlib.ExitStack() as stack:
	try:
	# Get an open handle to a working network namespace so we can switch
	# back to it for network-dependent operations (e.g. telemetry
	# uploads).
	# pylint: disable=consider-using-with
	network_fd = stack.enter_context(open("/proc/self/ns/net", "rb"))
	logging.debug(
	"open %s %s",
	network_fd.fileno(),
	os.readlink("/proc/self/ns/net"),
	)
	except OSError as e:
	logging.debug(
	"failed to open file descriptor to current network namespace: "
	"%s",
	repr(e),
	)

	try:
	# Make sure we run with network disabled to prevent leakage.
	SimpleUnshare(net=True, pid=True)
	# We got our namespaces, so switch back to the non-root user.
	os_util.switch_to_sudo_user()
	except OSError as e:
	logging.warning("an unshare(2) operation failed: %s", repr(e))

	try:
	yield
	finally:
	# Don't attempt SetNS if we don't have a useful file descriptor for
	# the network namespace.
	if network_fd:
	try:
	# Turn network back on to allow containing telemetry trace
	# to be sent to clearcut.
	os.setresuid(0, 0, -1)
	os.setresgid(0, 0, -1)
	SetNS(network_fd.fileno(), CLONE_NEWNET)
	except OSError as e:
	logging.warning(
	"Trying to re-enter original network namespace failed: "
	"%s",
	repr(e),
	)