sandbox.cc - external/seccompsandbox - Git at Google

 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "sandbox_impl.h"

 #include "library.h"
 #include "syscall_entrypoint.h"
 #include "system_call_table.h"

 namespace playground {

 // Global variables
 int                                 Sandbox::proc_self_maps_ = -1;
 enum Sandbox::SandboxStatus         Sandbox::status_ = STATUS_UNKNOWN;
 int                                 Sandbox::pid_;
 int                                 Sandbox::processFdPub_;
 int                                 Sandbox::cloneFdPub_
   // This is necessary to locate the symbol from assembly code on
   // x86-64 (with %rip-relative addressing) in order for this to work
   // in relocatable code (a .so or a PIE).  On i386 this is not
   // necessary but it does not hurt.
   __attribute__((visibility("internal")));
 Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_;
 Sandbox::ProtectedMap               Sandbox::protectedMap_;
 std::vector<SecureMem::Args*>       Sandbox::secureMemPool_;
 CreateTrustedThreadFunc             g_create_trusted_thread =
   Sandbox::createTrustedThread;

 bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf,
                      size_t len) {
   int fds[2], count                     = 0;
   if (fd0 >= 0) { fds[count++]          = fd0; }
   if (fd1 >= 0) { fds[count++]          = fd1; }
   if (!count) {
     return false;
   }
   char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
   memset(cmsg_buf, 0, sizeof(cmsg_buf));
   struct SysCalls::kernel_iovec  iov[2] = { { 0 } };
   struct SysCalls::kernel_msghdr msg    = { 0 };
   int dummy                             = 0;
   iov[0].iov_base                       = &dummy;
   iov[0].iov_len                        = sizeof(dummy);
   if (buf && len > 0) {
     iov[1].iov_base                     = const_cast<void *>(buf);
     iov[1].iov_len                      = len;
   }
   msg.msg_iov                           = iov;
   msg.msg_iovlen                        = (buf && len > 0) ? 2 : 1;
   msg.msg_control                       = cmsg_buf;
   msg.msg_controllen                    = CMSG_LEN(count*sizeof(int));
   struct cmsghdr *cmsg                  = CMSG_FIRSTHDR(&msg);
   cmsg->cmsg_level                      = SOL_SOCKET;
   cmsg->cmsg_type                       = SCM_RIGHTS;
   cmsg->cmsg_len                        = CMSG_LEN(count*sizeof(int));
   memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int));
   SysCalls sys;
   return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) ==
       (ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0));
 }

 bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) {
   int count                            = 0;
   int *err                             = NULL;
   if (fd0) {
     count++;
     err                                = fd0;
     *fd0                               = -1;
   }
   if (fd1) {
     if (!count++) {
       err                              = fd1;
     }
     *fd1                               = -1;
   }
   if (!count) {
     return false;
   }
   char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
   memset(cmsg_buf, 0, sizeof(cmsg_buf));
   struct SysCalls::kernel_iovec iov[2] = { { 0 } };
   struct SysCalls::kernel_msghdr msg   = { 0 };
   iov[0].iov_base                      = err;
   iov[0].iov_len                       = sizeof(int);
   if (buf && len && *len > 0) {
     iov[1].iov_base                    = buf;
     iov[1].iov_len                     = *len;
   }
   msg.msg_iov                          = iov;
   msg.msg_iovlen                       = (buf && len && *len > 0) ? 2 : 1;
   msg.msg_control                      = cmsg_buf;
   msg.msg_controllen                   = CMSG_LEN(count*sizeof(int));
   SysCalls sys;
   ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0));
   if (len) {
     *len                               = bytes > (int)sizeof(int) ?
                                            bytes - sizeof(int) : 0;
   }
   if (bytes != (ssize_t)(sizeof(int) + ((buf && len && *len > 0) ? *len : 0))){
     *err                               = bytes >= 0 ? 0 : -EBADF;
     return false;
   }
   if (*err) {
     // "err" is the first four bytes of the payload. If these are non-zero,
     // the sender on the other side of the socketpair sent us an errno value.
     // We don't expect to get any file handles in this case.
     return false;
   }
   struct cmsghdr *cmsg               = CMSG_FIRSTHDR(&msg);
   if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
       !cmsg                                    ||
       cmsg->cmsg_level != SOL_SOCKET           ||
       cmsg->cmsg_type  != SCM_RIGHTS           ||
       cmsg->cmsg_len   != CMSG_LEN(count*sizeof(int))) {
     *err                             = -EBADF;
     return false;
   }
   if (fd1) { *fd1 = ((int *)CMSG_DATA(cmsg))[--count]; }
   if (fd0) { *fd0 = ((int *)CMSG_DATA(cmsg))[--count]; }
   return true;
 }

 void segvSignalHandler(int signo, Sandbox::SysCalls::siginfo *context,
                        void *unused)
   asm("playground$segvSignalHandler") INTERNAL;

 void Sandbox::setupSignalHandlers() {
   // Set SIGCHLD to SIG_DFL so that waitpid() can work
   SysCalls sys;
   struct SysCalls::kernel_sigaction sa;
   memset(&sa, 0, sizeof(sa));
   sa.sa_handler_ = SIG_DFL;
   sys.sigaction(SIGCHLD, &sa, NULL);

   // Set up SEGV handler for dealing with RDTSC instructions, system calls
   // that have been rewritten to use INT0, for sigprocmask() emulation, for
   // the creation of threads, and for user-provided SEGV handlers.
   sa.sa_sigaction_ = segvSignalHandler;
   sa.sa_flags      = SA_SIGINFO | SA_NODEFER;
   sys.sigaction(SIGSEGV, &sa, &sa_segv_);

   // Unblock SIGSEGV and SIGCHLD
   SysCalls::kernel_sigset_t mask;
   memset(&mask, 0x00, sizeof(mask));
   mask.sig[0] |= (1 << (SIGSEGV - 1)) | (1 << (SIGCHLD - 1));
   sys.sigprocmask(SIG_UNBLOCK, &mask, 0);
 }

 long Sandbox::forwardSyscall(int sysnum, struct RequestHeader* request,
                              int size) {
   SysCalls sys;
   long rc;
   request->sysnum = sysnum;
   request->cookie = cookie();
   if (write(sys, processFdPub(), request, size) != size) {
     die("Failed to send forwarded request");
   }
   if (read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
     die("Failed to receive forwarded result");
   }
   return rc;
 }

 SecureMem::Args* Sandbox::getSecureMem() {
   // Check trusted_thread.cc for the magic offset that gets us from the TLS
   // to the beginning of the secure memory area.
   SecureMem::Args* ret;
 #if defined(__x86_64__)
   asm volatile(
     "movq %%gs:-0xE0, %0\n"
     : "=q"(ret));
 #elif defined(__i386__)
   asm volatile(
     "movl %%fs:-0x58, %0\n"
     : "=r"(ret));
 #else
 #error Unsupported target platform
 #endif
   return ret;
 }

 void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) {
   SysCalls sys;
   if (sys.lseek(proc_self_maps, 0, SEEK_SET) ||
       !sendFd(processFd, proc_self_maps, -1, NULL, 0)) {
  failure:
     die("Cannot access /proc/self/maps");
   }
   int dummy;
   if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) {
     goto failure;
   }
 }

 int Sandbox::supportsSeccompSandbox(int proc_fd) {
   if (status_ != STATUS_UNKNOWN) {
     return status_ != STATUS_UNSUPPORTED;
   }
   int fds[2];
   SysCalls sys;
   if (sys.pipe(fds)) {
     status_ = STATUS_UNSUPPORTED;
     return 0;
   }
   pid_t pid;
   switch ((pid = sys.fork())) {
     case -1:
       status_ = STATUS_UNSUPPORTED;
       return 0;
     case 0: {
       int devnull = sys.open("/dev/null", O_RDWR, 0);
       if (devnull >= 0) {
         sys.dup2(devnull, 0);
         sys.dup2(devnull, 1);
         sys.dup2(devnull, 2);
         sys.close(devnull);
       }
       if (proc_fd >= 0) {
         setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0));
       }
       startSandbox();
       write(sys, fds[1], "", 1);

       // Try to tell the trusted thread to shut down the entire process in an
       // orderly fashion
       defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0);

       // If that did not work (e.g. because the kernel does not know about the
       // exit_group() system call), make a direct _exit() system call instead.
       // This system call is unrestricted in seccomp mode, so it will always
       // succeed. Normally, we don't like it, because unlike exit_group() it
       // does not terminate any other thread. But since we know that
       // exit_group() exists in all kernels which support kernel-level threads,
       // this is OK we only get here for old kernels where _exit() is OK.
       sys._exit(0);
     }
     default:
       (void)NOINTR_SYS(sys.close(fds[1]));
       char ch;
       if (read(sys, fds[0], &ch, 1) != 1) {
         status_ = STATUS_UNSUPPORTED;
       } else {
         status_ = STATUS_AVAILABLE;
       }
       int rc;
       (void)NOINTR_SYS(sys.waitpid(pid, &rc, 0));
       (void)NOINTR_SYS(sys.close(fds[0]));
       return status_ != STATUS_UNSUPPORTED;
   }
 }

 void Sandbox::setProcSelfMaps(int proc_self_maps) {
   proc_self_maps_ = proc_self_maps;
 }

 void Sandbox::startSandbox() {
   if (status_ == STATUS_UNSUPPORTED) {
     die("The seccomp sandbox is not supported on this computer");
   } else if (status_ == STATUS_ENABLED) {
     return;
   }

   SysCalls sys;
   if (proc_self_maps_ < 0) {
     proc_self_maps_        = sys.open("/proc/self/maps", O_RDONLY, 0);
     if (proc_self_maps_ < 0) {
       die("Cannot access \"/proc/self/maps\"");
     }
   }

   // The pid is unchanged for the entire program, so we can retrieve it once
   // and store it in a global variable.
   pid_                     = sys.getpid();

   // Block all signals, except for the RDTSC handler
   setupSignalHandlers();

   // Set up the system call policy
   SyscallTable::initializeSyscallTable();

   // Get socketpairs for talking to the trusted process
   int pair[4];
   if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) ||
       sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) {
     die("Failed to create trusted thread");
   }
   processFdPub_            = pair[0];
   cloneFdPub_              = pair[2];
   SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1],
                                                   pair[2], pair[3]);

   // We find all libraries that have system calls and redirect the system
   // calls to the sandbox. If we miss any system calls, the application will be
   // terminated by the kernel's seccomp code. So, from a security point of
   // view, if this code fails to identify system calls, we are still behaving
   // correctly.
   {
     Maps maps(proc_self_maps_);
     const char *libs[]     = { "ld", "libc", "librt", "libpthread", NULL };

     // Intercept system calls in the VDSO segment (if any). This has to happen
     // before intercepting system calls in any of the other libraries, as
     // the main kernel entry point might be inside of the VDSO and we need to
     // determine its address before we can compare it to jumps from inside
     // other libraries.
     for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
       Library* library = *iter;
       if (library->isVDSO() && library->parseElf()) {
         library->makeWritable(true);
         library->patchSystemCalls();
         library->makeWritable(false);
         break;
       }
     }

     // Intercept system calls in libraries that are known to have them.
     for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
       Library* library = *iter;
       const char* mapping = iter.name().c_str();

       // Find the actual base name of the mapped library by skipping past any
       // SPC and forward-slashes. We don't want to accidentally find matches,
       // because the directory name included part of our well-known lib names.
       //
       // Typically, prior to pruning, entries would look something like this:
       // 08:01 2289011 /lib/libc-2.7.so
       for (const char *delim = " /"; *delim; ++delim) {
         const char* skip = strrchr(mapping, *delim);
         if (skip) {
           mapping = skip + 1;
         }
       }

       for (const char **ptr = libs; *ptr; ptr++) {
         const char *name = strstr(mapping, *ptr);
         if (name == mapping) {
           char ch = name[strlen(*ptr)];
           if (ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z') {
             if (library->parseElf()) {
               library->makeWritable(true);
               library->patchSystemCalls();
               library->makeWritable(false);
               break;
             }
           }
         }
       }
     }
   }

   // Take a snapshot of the current memory mappings. These mappings will be
   // off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls.
   // This also provides a synchronization point that ensures the trusted
   // process has finished initialization.
   snapshotMemoryMappings(processFdPub_, proc_self_maps_);
   (void)NOINTR_SYS(sys.close(proc_self_maps_));
   proc_self_maps_ = -1;

   // Creating the trusted thread enables sandboxing
   g_create_trusted_thread(secureMem);

   // Force direct system calls to jump to our entry point.
   struct {
     // Instantiate another copy of linux_syscall_support.h. This time, we
     // define SYS_SYSCALL_ENTRYPOINT. This gives us access to a
     // get_syscall_entrypoint() function that we can use to install a pointer
     // to our system call entrypoint handler.
     // Any user of linux_syscall_support.h who wants to make sure that the
     // sandbox properly redirects its system calls would define the same
     // macro.
     #undef  SYS_ERRNO
     #define SYS_INLINE             inline
     #define SYS_PREFIX             -1
     #define SYS_SYSCALL_ENTRYPOINT "playground$syscallEntryPoint"
     #undef  SYS_LINUX_SYSCALL_SUPPORT_H
     #include "linux_syscall_support.h"
   } entrypoint;
   *entrypoint.get_syscall_entrypoint() = syscallEntryPointNoFrame;

   // We can no longer check for sandboxing support at this point, but we also
   // know for a fact that it is available (as we just turned it on). So update
   // the status to reflect this information.
   status_ = STATUS_ENABLED;
 }

 } // namespace
	// Copyright (c) 2010 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "sandbox_impl.h"

	#include "library.h"
	#include "syscall_entrypoint.h"
	#include "system_call_table.h"

	namespace playground {

	// Global variables
	int Sandbox::proc_self_maps_ = -1;
	enum Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
	int Sandbox::pid_;
	int Sandbox::processFdPub_;
	int Sandbox::cloneFdPub_
	// This is necessary to locate the symbol from assembly code on
	// x86-64 (with %rip-relative addressing) in order for this to work
	// in relocatable code (a .so or a PIE). On i386 this is not
	// necessary but it does not hurt.
	__attribute__((visibility("internal")));
	Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_;
	Sandbox::ProtectedMap Sandbox::protectedMap_;
	std::vector<SecureMem::Args*> Sandbox::secureMemPool_;
	CreateTrustedThreadFunc g_create_trusted_thread =
	Sandbox::createTrustedThread;

	bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf,
	size_t len) {
	int fds[2], count = 0;
	if (fd0 >= 0) { fds[count++] = fd0; }
	if (fd1 >= 0) { fds[count++] = fd1; }
	if (!count) {
	return false;
	}
	char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
	memset(cmsg_buf, 0, sizeof(cmsg_buf));
	struct SysCalls::kernel_iovec iov[2] = { { 0 } };
	struct SysCalls::kernel_msghdr msg = { 0 };
	int dummy = 0;
	iov[0].iov_base = &dummy;
	iov[0].iov_len = sizeof(dummy);
	if (buf && len > 0) {
	iov[1].iov_base = const_cast<void *>(buf);
	iov[1].iov_len = len;
	}
	msg.msg_iov = iov;
	msg.msg_iovlen = (buf && len > 0) ? 2 : 1;
	msg.msg_control = cmsg_buf;
	msg.msg_controllen = CMSG_LEN(count*sizeof(int));
	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
	cmsg->cmsg_level = SOL_SOCKET;
	cmsg->cmsg_type = SCM_RIGHTS;
	cmsg->cmsg_len = CMSG_LEN(count*sizeof(int));
	memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int));
	SysCalls sys;
	return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) ==
	(ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0));
	}

	bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) {
	int count = 0;
	int *err = NULL;
	if (fd0) {
	count++;
	err = fd0;
	*fd0 = -1;
	}
	if (fd1) {
	if (!count++) {
	err = fd1;
	}
	*fd1 = -1;
	}
	if (!count) {
	return false;
	}
	char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
	memset(cmsg_buf, 0, sizeof(cmsg_buf));
	struct SysCalls::kernel_iovec iov[2] = { { 0 } };
	struct SysCalls::kernel_msghdr msg = { 0 };
	iov[0].iov_base = err;
	iov[0].iov_len = sizeof(int);
	if (buf && len && *len > 0) {
	iov[1].iov_base = buf;
	iov[1].iov_len = *len;
	}
	msg.msg_iov = iov;
	msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1;
	msg.msg_control = cmsg_buf;
	msg.msg_controllen = CMSG_LEN(count*sizeof(int));
	SysCalls sys;
	ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0));
	if (len) {
	*len = bytes > (int)sizeof(int) ?
	bytes - sizeof(int) : 0;
	}
	if (bytes != (ssize_t)(sizeof(int) + ((buf && len && len > 0) ? len : 0))){
	*err = bytes >= 0 ? 0 : -EBADF;
	return false;
	}
	if (*err) {
	// "err" is the first four bytes of the payload. If these are non-zero,
	// the sender on the other side of the socketpair sent us an errno value.
	// We don't expect to get any file handles in this case.
	return false;
	}
	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
	if ((msg.msg_flags & (MSG_TRUNC\|MSG_CTRUNC)) \|\|
	!cmsg \|\|
	cmsg->cmsg_level != SOL_SOCKET \|\|
	cmsg->cmsg_type != SCM_RIGHTS \|\|
	cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) {
	*err = -EBADF;
	return false;
	}
	if (fd1) { fd1 = ((int )CMSG_DATA(cmsg))[--count]; }
	if (fd0) { fd0 = ((int )CMSG_DATA(cmsg))[--count]; }
	return true;
	}

	void segvSignalHandler(int signo, Sandbox::SysCalls::siginfo *context,
	void *unused)
	asm("playground$segvSignalHandler") INTERNAL;

	void Sandbox::setupSignalHandlers() {
	// Set SIGCHLD to SIG_DFL so that waitpid() can work
	SysCalls sys;
	struct SysCalls::kernel_sigaction sa;
	memset(&sa, 0, sizeof(sa));
	sa.sa_handler_ = SIG_DFL;
	sys.sigaction(SIGCHLD, &sa, NULL);

	// Set up SEGV handler for dealing with RDTSC instructions, system calls
	// that have been rewritten to use INT0, for sigprocmask() emulation, for
	// the creation of threads, and for user-provided SEGV handlers.
	sa.sa_sigaction_ = segvSignalHandler;
	sa.sa_flags = SA_SIGINFO \| SA_NODEFER;
	sys.sigaction(SIGSEGV, &sa, &sa_segv_);

	// Unblock SIGSEGV and SIGCHLD
	SysCalls::kernel_sigset_t mask;
	memset(&mask, 0x00, sizeof(mask));
	mask.sig[0] \|= (1 << (SIGSEGV - 1)) \| (1 << (SIGCHLD - 1));
	sys.sigprocmask(SIG_UNBLOCK, &mask, 0);
	}

	long Sandbox::forwardSyscall(int sysnum, struct RequestHeader* request,
	int size) {
	SysCalls sys;
	long rc;
	request->sysnum = sysnum;
	request->cookie = cookie();
	if (write(sys, processFdPub(), request, size) != size) {
	die("Failed to send forwarded request");
	}
	if (read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
	die("Failed to receive forwarded result");
	}
	return rc;
	}

	SecureMem::Args* Sandbox::getSecureMem() {
	// Check trusted_thread.cc for the magic offset that gets us from the TLS
	// to the beginning of the secure memory area.
	SecureMem::Args* ret;
	#if defined(__x86_64__)
	asm volatile(
	"movq %%gs:-0xE0, %0\n"
	: "=q"(ret));
	#elif defined(__i386__)
	asm volatile(
	"movl %%fs:-0x58, %0\n"
	: "=r"(ret));
	#else
	#error Unsupported target platform
	#endif
	return ret;
	}

	void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) {
	SysCalls sys;
	if (sys.lseek(proc_self_maps, 0, SEEK_SET) \|\|
	!sendFd(processFd, proc_self_maps, -1, NULL, 0)) {
	failure:
	die("Cannot access /proc/self/maps");
	}
	int dummy;
	if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) {
	goto failure;
	}
	}

	int Sandbox::supportsSeccompSandbox(int proc_fd) {
	if (status_ != STATUS_UNKNOWN) {
	return status_ != STATUS_UNSUPPORTED;
	}
	int fds[2];
	SysCalls sys;
	if (sys.pipe(fds)) {
	status_ = STATUS_UNSUPPORTED;
	return 0;
	}
	pid_t pid;
	switch ((pid = sys.fork())) {
	case -1:
	status_ = STATUS_UNSUPPORTED;
	return 0;
	case 0: {
	int devnull = sys.open("/dev/null", O_RDWR, 0);
	if (devnull >= 0) {
	sys.dup2(devnull, 0);
	sys.dup2(devnull, 1);
	sys.dup2(devnull, 2);
	sys.close(devnull);
	}
	if (proc_fd >= 0) {
	setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0));
	}
	startSandbox();
	write(sys, fds[1], "", 1);

	// Try to tell the trusted thread to shut down the entire process in an
	// orderly fashion
	defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0);

	// If that did not work (e.g. because the kernel does not know about the
	// exit_group() system call), make a direct _exit() system call instead.
	// This system call is unrestricted in seccomp mode, so it will always
	// succeed. Normally, we don't like it, because unlike exit_group() it
	// does not terminate any other thread. But since we know that
	// exit_group() exists in all kernels which support kernel-level threads,
	// this is OK we only get here for old kernels where _exit() is OK.
	sys._exit(0);
	}
	default:
	(void)NOINTR_SYS(sys.close(fds[1]));
	char ch;
	if (read(sys, fds[0], &ch, 1) != 1) {
	status_ = STATUS_UNSUPPORTED;
	} else {
	status_ = STATUS_AVAILABLE;
	}
	int rc;
	(void)NOINTR_SYS(sys.waitpid(pid, &rc, 0));
	(void)NOINTR_SYS(sys.close(fds[0]));
	return status_ != STATUS_UNSUPPORTED;
	}
	}

	void Sandbox::setProcSelfMaps(int proc_self_maps) {
	proc_self_maps_ = proc_self_maps;
	}

	void Sandbox::startSandbox() {
	if (status_ == STATUS_UNSUPPORTED) {
	die("The seccomp sandbox is not supported on this computer");
	} else if (status_ == STATUS_ENABLED) {
	return;
	}

	SysCalls sys;
	if (proc_self_maps_ < 0) {
	proc_self_maps_ = sys.open("/proc/self/maps", O_RDONLY, 0);
	if (proc_self_maps_ < 0) {
	die("Cannot access \"/proc/self/maps\"");
	}
	}

	// The pid is unchanged for the entire program, so we can retrieve it once
	// and store it in a global variable.
	pid_ = sys.getpid();

	// Block all signals, except for the RDTSC handler
	setupSignalHandlers();

	// Set up the system call policy
	SyscallTable::initializeSyscallTable();

	// Get socketpairs for talking to the trusted process
	int pair[4];
	if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) \|\|
	sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) {
	die("Failed to create trusted thread");
	}
	processFdPub_ = pair[0];
	cloneFdPub_ = pair[2];
	SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1],
	pair[2], pair[3]);

	// We find all libraries that have system calls and redirect the system
	// calls to the sandbox. If we miss any system calls, the application will be
	// terminated by the kernel's seccomp code. So, from a security point of
	// view, if this code fails to identify system calls, we are still behaving
	// correctly.
	{
	Maps maps(proc_self_maps_);
	const char *libs[] = { "ld", "libc", "librt", "libpthread", NULL };

	// Intercept system calls in the VDSO segment (if any). This has to happen
	// before intercepting system calls in any of the other libraries, as
	// the main kernel entry point might be inside of the VDSO and we need to
	// determine its address before we can compare it to jumps from inside
	// other libraries.
	for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
	Library* library = *iter;
	if (library->isVDSO() && library->parseElf()) {
	library->makeWritable(true);
	library->patchSystemCalls();
	library->makeWritable(false);
	break;
	}
	}

	// Intercept system calls in libraries that are known to have them.
	for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
	Library* library = *iter;
	const char* mapping = iter.name().c_str();

	// Find the actual base name of the mapped library by skipping past any
	// SPC and forward-slashes. We don't want to accidentally find matches,
	// because the directory name included part of our well-known lib names.
	//
	// Typically, prior to pruning, entries would look something like this:
	// 08:01 2289011 /lib/libc-2.7.so
	for (const char delim = " /"; delim; ++delim) {
	const char* skip = strrchr(mapping, *delim);
	if (skip) {
	mapping = skip + 1;
	}
	}

	for (const char *ptr = libs; ptr; ptr++) {
	const char name = strstr(mapping, ptr);
	if (name == mapping) {
	char ch = name[strlen(*ptr)];
	if (ch < 'A' \|\| (ch > 'Z' && ch < 'a') \|\| ch > 'z') {
	if (library->parseElf()) {
	library->makeWritable(true);
	library->patchSystemCalls();
	library->makeWritable(false);
	break;
	}
	}
	}
	}
	}
	}

	// Take a snapshot of the current memory mappings. These mappings will be
	// off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls.
	// This also provides a synchronization point that ensures the trusted
	// process has finished initialization.
	snapshotMemoryMappings(processFdPub_, proc_self_maps_);
	(void)NOINTR_SYS(sys.close(proc_self_maps_));
	proc_self_maps_ = -1;

	// Creating the trusted thread enables sandboxing
	g_create_trusted_thread(secureMem);

	// Force direct system calls to jump to our entry point.
	struct {
	// Instantiate another copy of linux_syscall_support.h. This time, we
	// define SYS_SYSCALL_ENTRYPOINT. This gives us access to a
	// get_syscall_entrypoint() function that we can use to install a pointer
	// to our system call entrypoint handler.
	// Any user of linux_syscall_support.h who wants to make sure that the
	// sandbox properly redirects its system calls would define the same
	// macro.
	#undef SYS_ERRNO
	#define SYS_INLINE inline
	#define SYS_PREFIX -1
	#define SYS_SYSCALL_ENTRYPOINT "playground$syscallEntryPoint"
	#undef SYS_LINUX_SYSCALL_SUPPORT_H
	#include "linux_syscall_support.h"
	} entrypoint;
	*entrypoint.get_syscall_entrypoint() = syscallEntryPointNoFrame;

	// We can no longer check for sandboxing support at this point, but we also
	// know for a fact that it is available (as we just turned it on). So update
	// the status to reflect this information.
	status_ = STATUS_ENABLED;
	}

	} // namespace