trusted_thread_x86_64.S - external/seccompsandbox - Git at Google

 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <asm/unistd.h>


 #define CHECK_SYSCALL_ZERO  test %rax, %rax; jnz fatal_error


         .internal playground$runTrustedThread
         .global playground$runTrustedThread
 playground$runTrustedThread:
         push %rbx
         push %rbp
         mov  %rdi, %rbp          // %rbp = args
         xor  %rbx, %rbx          // initial sequence number
         lea  999f(%rip), %r15    // continue in same thread

         // Signal handlers are process-wide. This means that for security
         // reasons, we cannot allow that the trusted thread ever executes any
         // signal handlers.
         // We prevent the execution of signal handlers by setting a signal
         // mask that blocks all signals. In addition, we make sure that the
         // stack pointer is invalid.
         // We cannot reset the signal mask until after we have enabled
         // Seccomp mode. Our sigprocmask() wrapper would normally do this by
         // raising a signal, modifying the signal mask in the kernel-generated
         // signal frame, and then calling sigreturn(). This presents a bit of
         // a Catch-22, as all signals are masked and we can therefore not
         // raise any signal that would allow us to generate the signal stack
         // frame.
         // Instead, we have to create the signal stack frame prior to entering
         // Seccomp mode. This incidentally also helps us to restore the
         // signal mask to the same value that it had prior to entering the
         // sandbox.
         // The signal wrapper for clone() is the second entry point into this
         // code (by means of sending an IPC to its trusted thread). It goes
         // through the same steps of creating a signal stack frame on the
         // newly created thread's stacks prior to cloning. See clone.cc for
         // details.
         mov  $__NR_clone + 0xF000, %eax
         mov  %rsp, %rcx
         int  $0                  // push a signal stack frame (see clone.cc)
         mov  %rcx, 0xA0(%rsp)    // pop stack upon call to sigreturn()
         mov  %r15, 0xA8(%rsp)    // return address: continue in same thread
         mov  %rsp, %r9
         mov  $2, %rdi            // how     = SIG_SETMASK
         pushq $-1
         mov  %rsp, %rsi          // set     = full mask
         xor  %rdx, %rdx          // old_set = NULL
         mov  $8, %r10            // mask all 64 signals
         mov  $__NR_rt_sigprocmask, %eax
         syscall
         CHECK_SYSCALL_ZERO
         xor  %rsp, %rsp          // invalidate the stack in all trusted code
         jmp  20f                 // create trusted thread

         // TODO(markus): Coalesce the read() operations by reading into a
         //               bigger buffer.

         // Parameters:
         //   *%fs: secure memory region
         //         the page following this one contains the scratch space
         //   %r13: thread's side of threadFd

         // Local variables:
         //   %rbx: sequence number for trusted calls

         // Temporary variables:
         //   %r8: child stack
         //   %r9: system call number, child stack
         //  %rbp: secure memory of previous thread

         // Layout of secure shared memory region (c.f. securemem.h):
         //   0x00:  pointer to the secure shared memory region (i.e. self)
         //   0x08:  sequence number; must match %rbx
         //   0x10:  call type; must match %eax, iff %eax == -1 || %eax == -2
         //   0x18:  system call number; passed to syscall in %rax
         //   0x20:  first argument; passed to syscall in %rdi
         //   0x28:  second argument; passed to syscall in %rsi
         //   0x30:  third argument; passed to syscall in %rdx
         //   0x38:  fourth argument; passed to syscall in %r10
         //   0x40:  fifth argument; passed to syscall in %r8
         //   0x48:  sixth argument; passed to syscall in %r9
         //   0x50-0xC0: no longer used
         //   0xC8:  new shared memory for clone()
         //   0xD0:  no longer used
         //   0xD4:  no longer used
         //   0xD8:  set to non-zero, if in debugging mode
         //   0xDC:  most recent SHM id returned by shmget(IPC_PRIVATE)
         //   0xE0:  cookie assigned to us by the trusted process (TLS_COOKIE)
         //   0xE8:  thread id (TLS_TID)
         //   0xF0:  threadFdPub (TLS_THREAD_FD)
         //   0xF8:  syscallMutex
         //   0xFC:  maxSyscall
         //   0x100: syscallTable
         //   0x200-0x1000: securely passed verified file name(s)

         // Layout of (untrusted) scratch space:
         //   0x00:  syscall number; passed in %rax
         //   0x04:  first argument; passed in %rdi
         //   0x0C:  second argument; passed in %rsi
         //   0x14:  third argument; passed in %rdx
         //   0x1C:  fourth argument; passed in %r10
         //   0x24:  fifth argument; passed in %r8
         //   0x2C:  sixth argument; passed in %r9
         //   0x34:  return value
         //   0x3C:  RDTSCP result (%eax)
         //   0x40:  RDTSCP result (%edx)
         //   0x44:  RDTSCP result (%ecx)
         //   0x48:  last system call (not used on x86-64)
         //   0x4C:  number of consecutive calls to a time fnc; unused on x86-64
         //   0x50:  nesting level of system calls (for debugging purposes only)
         //   0x54:  signal mask
         //   0x5C:  in SEGV handler

         // We use the %fs register for accessing the secure read-only page, and
         // the untrusted scratch space immediately following it. The segment
         // register and the local descriptor table is set up by passing
         // appropriate arguments to clone().

       0:xor  %rsp, %rsp
         mov  $2, %ebx            // %rbx  = initial sequence number

         // Read request from untrusted thread, or from trusted process. In
         // either case, the data that we read has to be considered untrusted.
         // read(threadFd, &scratch, 4)
       1:xor  %rax, %rax          // NR_read
         mov  %r13, %rdi          // fd  = threadFd
         mov  %fs:0x0, %rsi       // secure_mem
         add  $0x1000, %rsi       // buf = &scratch
         mov  $4, %edx            // len = 4
       2:syscall
         cmp  $-4, %rax           // EINTR
         jz   2b
         cmp  %rdx, %rax
         jnz  fatal_error

         // Retrieve system call number. It is crucial that we only dereference
         // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and
         // we must use the value that we have read the first time.
         mov  0(%rsi), %eax

         // If syscall number is -1, execute an unlocked system call from the
         // secure memory area
         cmp  $-1, %eax
         jnz  5f
       3:cmp  %rbx, %fs:0x8
         jne  fatal_error
         cmp  %fs:0x10, %eax
         jne  fatal_error
         mov  %fs:0x18, %eax
         mov  %fs:0x20, %rdi
         mov  %fs:0x28, %rsi
         mov  %fs:0x30, %rdx
         mov  %fs:0x38, %r10
         mov  %fs:0x40, %r8
         mov  %fs:0x48, %r9
         cmp  %rbx, %fs:0x8
         jne  fatal_error
         add  $2, %rbx

         // clone() has unusual calling conventions and must be handled
         // specially
         cmp  $__NR_clone, %rax
         jz   19f

         // shmget() gets some special treatment. Whenever we return from this
         // system call, we remember the most recently returned SysV shm id.
         cmp  $__NR_shmget, %eax
         jnz  4f
         syscall
         mov  %rax, %r8
         mov  $__NR_clone, %eax
         mov  $17, %edi           // flags = SIGCHLD
         mov  $1, %esi            // stack = 1
         syscall
         test %rax, %rax
         js   fatal_error
         mov  %rax, %rdi
         jnz  8f                  // wait for child, then return result
         mov  %fs:0x0, %rdi       // start = secure_mem
         mov  $4096, %esi         // len   = 4096
         mov  $3, %edx            // prot  = PROT_READ | PROT_WRITE
         mov  $__NR_mprotect, %eax
         syscall
         CHECK_SYSCALL_ZERO
         mov  %r8d, 0xDC(%rdi)    // set most recently returned SysV shm id
         xor  %rdi, %rdi

         // When debugging messages are enabled, warn about expensive system
         // calls
         #ifndef NDEBUG
         cmpw $0, %fs:0xD8        // debug mode
         jz   27f
         mov  $__NR_write, %eax
         mov  $2, %edi            // fd = stderr
         lea  101f(%rip), %rsi    // "This is an expensive system call"
         mov  $102f-101f, %edx    // len = strlen(msg)
         syscall
         xor  %rdi, %rdi
         #endif

         jmp  27f                 // exit program, no message
       4:syscall
         jmp  15f                 // return result

         // If syscall number is -2, execute locked system call from the
         // secure memory area
       5:jg   12f
         cmp  $-2, %eax
         jnz  9f
         cmp  %rbx, %fs:0x8
         jne  fatal_error
         cmp  %eax, %fs:0x10
         jne  fatal_error

         // When debugging messages are enabled, warn about expensive system
         // calls
         #ifndef NDEBUG
         cmpw $0, %fs:0xD8        // debug mode
         jz   6f
         mov  $__NR_write, %eax
         mov  $2, %edi            // fd = stderr
         lea  101f(%rip), %rsi    // "This is an expensive system call"
         mov  $102f-101f, %edx    // len = strlen(msg)
         syscall
       6:
         #endif

         mov  %fs:0x18, %eax
         mov  %fs:0x20, %rdi
         mov  %fs:0x28, %rsi
         mov  %fs:0x30, %rdx
         mov  %fs:0x38, %r10
         mov  %fs:0x40, %r8
         mov  %fs:0x48, %r9
         cmp  %rbx, %fs:0x8
         jne  fatal_error

         // exit() terminates trusted thread
         cmp  $__NR_exit, %eax
         jz   18f

         // Perform requested system call
         syscall

         // Unlock mutex
       7:cmp  %rbx, %fs:0x8
         jne  fatal_error
         mov  %fs:0, %r12
         add  $2, %rbx
         mov  %rax, %r8
         mov  $__NR_clone, %eax
         mov  $17, %rdi           // flags = SIGCHLD
         mov  $1, %rsi            // stack = 1
         syscall
         test %rax, %rax
         js   fatal_error
         jz   22f                 // unlock and exit
         mov  %rax, %rdi
       8:xor  %rsi, %rsi
         xor  %rdx, %rdx
         xor  %r10, %r10
         mov  $__NR_wait4, %eax
         syscall
         cmp  $-4, %eax           // EINTR
         jz   8b
         mov  %r8, %rax
         jmp  15f                 // return result

         // If syscall number is -3, read the time stamp counter
       9:cmp  $-3, %eax
         jnz  10f
         rdtsc                    // sets %edx:%eax
         xor  %rcx, %rcx
         jmp  11f
       10:cmp  $-4, %eax
         jnz  12f
         rdtscp                   // sets %edx:%eax and %ecx
      11:add  $0x3C, %rsi
         mov  %eax, 0(%rsi)
         mov  %edx, 4(%rsi)
         mov  %ecx, 8(%rsi)
         mov  $12, %edx
         jmp  16f                 // return result

         // Check in syscallTable whether this system call is unrestricted
      12:mov  %rax, %r9
         #ifndef NDEBUG
         cmpw $0, %fs:0xD8        // debug mode
         jnz  13f
         #endif
         cmp  %fs:0xFC, %eax      // maxSyscall
         ja   fatal_error
         shl  $4, %rax
         mov  %fs:0x100, %rdi     // syscallTable
         add  %rdi, %rax
         mov  0(%rax), %rax
         cmp  $1, %rax
         jne  fatal_error

         // Default behavior for unrestricted system calls is to just execute
         // them. Read the remaining arguments first.
      13:mov  %rsi, %r8
         xor  %rax, %rax          // NR_read
         mov  %r13, %rdi          // fd  = threadFd
         add  $4, %rsi            // buf = &scratch + 4
         mov  $48, %edx           // len = 6*sizeof(void *)
      14:syscall
         cmp  $-4, %rax           // EINTR
         jz   14b
         cmp  %rdx, %rax
         jnz  fatal_error
         mov  %r9, %rax
         mov  0x04(%r8), %rdi
         mov  0x0C(%r8), %rsi
         mov  0x14(%r8), %rdx
         mov  0x1C(%r8), %r10
         mov  0x2C(%r8), %r9
         mov  0x24(%r8), %r8
         cmp  $__NR_exit_group, %rax
         jz   27f                 // exit program, no message
         syscall

         // Return result of system call to sandboxed thread
      15:mov  %fs:0x0, %rsi       // secure_mem
         add  $0x1034, %rsi       // buf   = &scratch + 52
         mov  %rax, (%rsi)
         mov  $8, %edx            // len   = 8
      16:mov  %r13, %rdi          // fd    = threadFd
         mov  $__NR_write, %eax
      17:syscall
         cmp  %rdx, %rax
         jz   1b
         cmp  $-4, %rax           // EINTR
         jz   17b
         jmp  fatal_error

         // NR_exit:
         // Exit trusted thread after cleaning up resources
      18:mov  %fs:0x0, %r12       // secure_mem
         mov  0xF0(%r12), %rdi    // fd     = threadFdPub
         mov  $__NR_close, %eax
         syscall
         CHECK_SYSCALL_ZERO
         mov  %r12, %rdi          // start  = secure_mem
         mov  $8192, %esi         // length = 8192
         xor  %rdx, %rdx          // prot   = PROT_NONE
         mov  $__NR_mprotect, %eax
         syscall
         CHECK_SYSCALL_ZERO
         mov  %r13, %rdi          // fd     = threadFd
         mov  $__NR_close, %eax
         syscall
         CHECK_SYSCALL_ZERO
         mov  $__NR_clone, %eax
         mov  $17, %rdi           // flags = SIGCHLD
         mov  $1, %rsi            // stack = 1
         syscall
         mov  %rax, %rdi
         test %rax, %rax
         js   27f                 // exit process
         jne  21f                 // reap helper, exit thread
         jmp  22f                 // unlock mutex

         // NR_clone:
         // Original trusted thread calls clone() to create new nascent
         // thread. This thread is (typically) fully privileged and shares all
         // resources with the caller (i.e. the previous trusted thread),
         // and by extension it shares all resources with the sandbox'd
         // threads.
      19:mov  %fs:0x0, %rbp       // %rbp  = old_shared_mem
         mov  %rsi, %r15          // remember child stack
         mov  $1, %rsi            // stack = 1
         syscall                  // calls NR_clone
         cmp  $-4095, %rax        // return codes -1..-4095 are errno values
         jae  7b                  // unlock mutex, return result
         test %rax, %rax
         jne  15b                 // return result

         // In nascent thread, now.
         // Undo sequence number increase that was made for the general case.
         sub  $2, %rbx

         // We want to maintain an invalid %rsp whenver we access untrusted
         // memory. This ensures that even if an attacker can trick us into
         // triggering a SIGSEGV, we will never successfully execute a signal
         // handler.
         // Signal handlers are inherently dangerous, as an attacker could trick
         // us into returning to the wrong address by adjusting the signal stack
         // right before the handler returns.
         // N.B. While POSIX is curiously silent about this, it appears that on
         // Linux, alternate signal stacks are a per-thread property. That is
         // good. It means that this security mechanism works, even if the
         // sandboxed thread manages to set up an alternate signal stack.
         //
         // TODO(markus): We currently do not support emulating calls to
         // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
         // for a discussion on how to fix this, if this ever becomes neccessary
         mov  %r15, %r9           // %r9 = child_stack
         xor  %r15, %r15          // Request to return from clone() when done

         // Get thread id of nascent thread
      20:mov  $__NR_gettid, %eax
         syscall
         mov  %rax, %r14

         // Nascent thread creates socketpair() for sending requests to
         // trusted thread.
         // We can create the filehandles on the child's stack. Filehandles are
         // always treated as untrusted.
         // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
         sub  $0x10, %r9
         mov  %r15, 8(%r9)        // preserve return address on child stack
         mov  $__NR_socketpair, %eax
         mov  $1, %edi            // domain = AF_UNIX
         mov  $1, %esi            // type = SOCK_STREAM
         xor  %rdx, %rdx          // protocol = 0
         mov  %r9, %r10           // sv = child_stack
         syscall
         test %rax, %rax
         jz   28f

         // If things went wrong, we don't have an (easy) way of signaling
         // the parent. For our purposes, it is sufficient to fail with a
         // fatal error.
         jmp  fatal_error
      21:xor  %rsi, %rsi
         xor  %rdx, %rdx
         xor  %r10, %r10
         mov  $__NR_wait4, %eax
         syscall
         cmp  $-4, %eax           // EINTR
         jz   21b
         jmp  23f                 // exit thread (no message)
         // Unlock syscallMutex and exit.
         // On entry %r12 = secureMem.  We cannot use %fs:0 in the case where
         // the page has been mprotect()'d to PROT_NONE.
      22:mov  %r12, %rdi
         mov  $4096, %esi
         mov  $3, %edx            // prot = PROT_READ | PROT_WRITE
         mov  $__NR_mprotect, %eax
         syscall
         CHECK_SYSCALL_ZERO
         add  $0xF8, %rdi
         lock; addl $0x80000000, (%rdi)
         jz   23f                 // exit thread
         mov  $1, %edx
         mov  %rdx, %rsi          // FUTEX_WAKE
         mov  $__NR_futex, %eax
         syscall
      23:mov  $__NR_exit, %eax
         mov  $1, %edi            // status = 1
      24:syscall
 fatal_error:
         mov  $__NR_write, %eax
         mov  $2, %edi            // fd = stderr
         lea  100f(%rip), %rsi    // "Sandbox violation detected"
         mov  $101f-100f, %edx    // len = strlen(msg)
         syscall
      26:mov  $1, %edi
      27:mov  $__NR_exit_group, %eax
         jmp  24b

         // The first page is mapped read-only for use as securely shared memory
      28:mov  0xC8(%rbp), %r12    // %r12 = secure shared memory
         cmp  %rbx, 8(%rbp)
         jne  fatal_error
         mov  $__NR_mprotect, %eax
         mov  %r12, %rdi          // addr = secure_mem
         mov  $4096, %esi         // len  = 4096
         mov  $1, %edx            // prot = PROT_READ
         syscall
         CHECK_SYSCALL_ZERO

         // The second page is used as scratch space by the trusted thread.
         // Make it writable.
         mov  $__NR_mprotect, %eax
         add  $4096, %rdi         // addr = secure_mem + 4096
         mov  $3, %edx            // prot = PROT_READ | PROT_WRITE
         syscall
         CHECK_SYSCALL_ZERO

         // Call clone() to create new trusted thread().
         // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
         //       CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL,
         //       tls)
         mov  4(%r9), %r13d       // %r13  = threadFd (on child's stack)
         mov  $__NR_clone, %eax
         mov  $0x8D0F00, %edi     // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS
         mov  $1, %rsi            // stack = 1
         mov  %r12, %r8           // tls   = new_secure_mem
         cmp  %rbx, 8(%rbp)
         jne  fatal_error
         syscall
         test %rax, %rax
         js   fatal_error
         jz   0b                  // invoke trustedThreadFnc()

         // Copy the caller's signal mask
         mov  0x1054(%rbp), %rax
         mov  %rax, 0x1054(%r12)

         // Done creating trusted thread. We can get ready to return to caller
         mov  %r9, %r8            // %r8 = child_stack
         mov  0(%r9), %r9d        // %r9 = threadFdPub

         // Set up thread local storage with information on how to talk to
         // trusted thread and trusted process.
         lea  0xE0(%r12), %rsi    // args   = &secure_mem.TLS;
         mov  $__NR_arch_prctl, %eax
         mov  $0x1001, %edi       // option = ARCH_SET_GS
         syscall
         cmp  $-4095, %rax        // return codes -1..-4095 are errno values
         jae  fatal_error

         add  $0x10, %r8

         // Check the sequence number
         cmp  %rbx, 8(%rbp)
         jne  fatal_error

         // Nascent thread launches a helper that doesn't share any of our
         // resources, except for pages mapped as MAP_SHARED.
         // clone(SIGCHLD, stack=1)
         mov  $__NR_clone, %eax
         mov  $17, %rdi           // flags = SIGCHLD
         mov  $1, %rsi            // stack = 1
         syscall
         test %rax, %rax
         js   fatal_error
         jne  31f

         // Use sendmsg() to send to the trusted process the file handles for
         // communicating with the new trusted thread. We also send the address
         // of the secure memory area (for sanity checks) and the thread id.
         // transport = Sandbox::cloneFdPub()
         mov  playground$cloneFdPub(%rip), %edi
         cmp  %rbx, 8(%rbp)
         jne  fatal_error

         // 0x00 msg:
         //   0x00 msg_name       ($0)
         //   0x08 msg_namelen    ($0)
         //   0x10 msg_iov        (%r8 + 0x44)
         //   0x18 msg_iovlen     ($1)
         //   0x20 msg_control    (%r8 + 0x54)
         //   0x28 msg_controllen ($0x18)
         // 0x30 data:
         //   0x30 msg_flags/err  ($0)
         //   0x34 secure_mem     (%r12)
         //   0x3C threadId       (%r14d)
         //   0x40 threadFdPub    (%r9d)
         // 0x44 iov:
         //   0x44 iov_base       (%r8 + 0x30)
         //   0x4C iov_len        ($0x14)
         // 0x54 cmsg:
         //   0x54 cmsg_len       ($0x18)
         //   0x5C cmsg_level     ($1, SOL_SOCKET)
         //   0x60 cmsg_type      ($1, SCM_RIGHTS)
         //   0x64 threadFdPub    (%r9d)
         //   0x68 threadFd       (%r13d)
         // 0x6C
         lea  sendmsg_data(%rip), %r8
         xor  %rdx, %rdx          // flags = 0
         mov  %rdx, 0x00(%r8)     // msg_name
         mov  %edx, 0x08(%r8)     // msg_namelen
         mov  %edx, 0x30(%r8)     // msg_flags
         mov  $1, %r11d
         mov  %r11, 0x18(%r8)     // msg_iovlen
         mov  %r11d, 0x5C(%r8)    // cmsg_level
         mov  %r11d, 0x60(%r8)    // cmsg_type
         lea  0x30(%r8), %r11
         mov  %r11, 0x44(%r8)     // iov_base
         add  $0x14, %r11
         mov  %r11, 0x10(%r8)     // msg_iov
         add  $0x10, %r11
         mov  %r11, 0x20(%r8)     // msg_control
         mov  $0x14, %r11d
         mov  %r11, 0x4C(%r8)     // iov_len
         add  $4, %r11d
         mov  %r11, 0x28(%r8)     // msg_controllen
         mov  %r11, 0x54(%r8)     // cmsg_len
         mov  %r12, 0x34(%r8)     // secure_mem
         mov  %r14d, 0x3C(%r8)    // threadId
         mov  %r9d, 0x40(%r8)     // threadFdPub
         mov  %r9d, 0x64(%r8)     // threadFdPub
         mov  %r13d, 0x68(%r8)    // threadFd
         mov  $__NR_sendmsg, %eax
         mov  %r8, %rsi           // msg
         syscall
      30:xor  %rdi, %rdi
         jmp  27b                 // exit process (no error message)

         // Reap helper
      31:mov  %rax, %rdi
      32:lea  -4(%r8), %rsi
         xor  %rdx, %rdx
         xor  %r10, %r10
         mov  $__NR_wait4, %eax
         syscall
         cmp  $-4, %eax           // EINTR
         jz   32b
         mov  -4(%r8), %eax
         test %rax, %rax
         jnz  26b                 // exit process (no error message)

         // Release privileges by entering seccomp mode.
         mov  $__NR_prctl, %eax
         mov  $22, %edi           // PR_SET_SECCOMP
         mov  $1, %esi
         syscall
         CHECK_SYSCALL_ZERO

         // We can finally start using the stack. Signal handlers no longer pose
         // a threat to us.
         mov  %r8, %rsp

         // Back in the newly created sandboxed thread, wait for trusted process
         // to receive request. It is possible for an attacker to make us
         // continue even before the trusted process is done. This is OK. It'll
         // result in us putting stale values into the new thread's TLS. But
         // that data is considered untrusted anyway.
         push %rax
         mov  $1, %edx            // len       = 1
         mov  %rsp, %rsi          // buf       = %rsp
         mov  %r9, %rdi           // fd        = threadFdPub
      33:xor  %rax, %rax          // NR_read
         syscall
         cmp  $-4, %rax           // EINTR
         jz   33b
         cmp  %rdx, %rax
         jne  fatal_error
         pop  %rax

         // Returning to the place where clone() had been called. We rely on
         // using rt_sigreturn() for restoring our registers. The caller already
         // created a signal stack frame and patched the register values
         // with the ones that were in effect prior to calling sandbox_clone().
         mov  $__NR_rt_sigreturn, %eax
         syscall

         .pushsection ".rodata"
     100:.ascii "Sandbox violation detected, program aborted\n"
     101:.ascii "WARNING! This is an expensive system call\n"
     102:
         .popsection

     999:pop  %rbp
         pop  %rbx
         ret


         .bss
         // Reserve space for sendmsg() data.  This is used in a fork()'d
         // helper process, so in principle this could safely overlap and
         // overwrite other data, but it is such a small amount of memory
         // that it is not worth trying to do that.  The only requirement
         // is that this must be in a MAP_PRIVATE mapping so that an
         // untrusted thread cannot modify the forked subprocess's copy.
 sendmsg_data:
         .space 0x6C
	// Copyright (c) 2010 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <asm/unistd.h>


	#define CHECK_SYSCALL_ZERO test %rax, %rax; jnz fatal_error


	.internal playground$runTrustedThread
	.global playground$runTrustedThread
	playground$runTrustedThread:
	push %rbx
	push %rbp
	mov %rdi, %rbp // %rbp = args
	xor %rbx, %rbx // initial sequence number
	lea 999f(%rip), %r15 // continue in same thread

	// Signal handlers are process-wide. This means that for security
	// reasons, we cannot allow that the trusted thread ever executes any
	// signal handlers.
	// We prevent the execution of signal handlers by setting a signal
	// mask that blocks all signals. In addition, we make sure that the
	// stack pointer is invalid.
	// We cannot reset the signal mask until after we have enabled
	// Seccomp mode. Our sigprocmask() wrapper would normally do this by
	// raising a signal, modifying the signal mask in the kernel-generated
	// signal frame, and then calling sigreturn(). This presents a bit of
	// a Catch-22, as all signals are masked and we can therefore not
	// raise any signal that would allow us to generate the signal stack
	// frame.
	// Instead, we have to create the signal stack frame prior to entering
	// Seccomp mode. This incidentally also helps us to restore the
	// signal mask to the same value that it had prior to entering the
	// sandbox.
	// The signal wrapper for clone() is the second entry point into this
	// code (by means of sending an IPC to its trusted thread). It goes
	// through the same steps of creating a signal stack frame on the
	// newly created thread's stacks prior to cloning. See clone.cc for
	// details.
	mov $__NR_clone + 0xF000, %eax
	mov %rsp, %rcx
	int $0 // push a signal stack frame (see clone.cc)
	mov %rcx, 0xA0(%rsp) // pop stack upon call to sigreturn()
	mov %r15, 0xA8(%rsp) // return address: continue in same thread
	mov %rsp, %r9
	mov $2, %rdi // how = SIG_SETMASK
	pushq $-1
	mov %rsp, %rsi // set = full mask
	xor %rdx, %rdx // old_set = NULL
	mov $8, %r10 // mask all 64 signals
	mov $__NR_rt_sigprocmask, %eax
	syscall
	CHECK_SYSCALL_ZERO
	xor %rsp, %rsp // invalidate the stack in all trusted code
	jmp 20f // create trusted thread

	// TODO(markus): Coalesce the read() operations by reading into a
	// bigger buffer.

	// Parameters:
	// *%fs: secure memory region
	// the page following this one contains the scratch space
	// %r13: thread's side of threadFd

	// Local variables:
	// %rbx: sequence number for trusted calls

	// Temporary variables:
	// %r8: child stack
	// %r9: system call number, child stack
	// %rbp: secure memory of previous thread

	// Layout of secure shared memory region (c.f. securemem.h):
	// 0x00: pointer to the secure shared memory region (i.e. self)
	// 0x08: sequence number; must match %rbx
	// 0x10: call type; must match %eax, iff %eax == -1 \|\| %eax == -2
	// 0x18: system call number; passed to syscall in %rax
	// 0x20: first argument; passed to syscall in %rdi
	// 0x28: second argument; passed to syscall in %rsi
	// 0x30: third argument; passed to syscall in %rdx
	// 0x38: fourth argument; passed to syscall in %r10
	// 0x40: fifth argument; passed to syscall in %r8
	// 0x48: sixth argument; passed to syscall in %r9
	// 0x50-0xC0: no longer used
	// 0xC8: new shared memory for clone()
	// 0xD0: no longer used
	// 0xD4: no longer used
	// 0xD8: set to non-zero, if in debugging mode
	// 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE)
	// 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE)
	// 0xE8: thread id (TLS_TID)
	// 0xF0: threadFdPub (TLS_THREAD_FD)
	// 0xF8: syscallMutex
	// 0xFC: maxSyscall
	// 0x100: syscallTable
	// 0x200-0x1000: securely passed verified file name(s)

	// Layout of (untrusted) scratch space:
	// 0x00: syscall number; passed in %rax
	// 0x04: first argument; passed in %rdi
	// 0x0C: second argument; passed in %rsi
	// 0x14: third argument; passed in %rdx
	// 0x1C: fourth argument; passed in %r10
	// 0x24: fifth argument; passed in %r8
	// 0x2C: sixth argument; passed in %r9
	// 0x34: return value
	// 0x3C: RDTSCP result (%eax)
	// 0x40: RDTSCP result (%edx)
	// 0x44: RDTSCP result (%ecx)
	// 0x48: last system call (not used on x86-64)
	// 0x4C: number of consecutive calls to a time fnc; unused on x86-64
	// 0x50: nesting level of system calls (for debugging purposes only)
	// 0x54: signal mask
	// 0x5C: in SEGV handler

	// We use the %fs register for accessing the secure read-only page, and
	// the untrusted scratch space immediately following it. The segment
	// register and the local descriptor table is set up by passing
	// appropriate arguments to clone().

	0:xor %rsp, %rsp
	mov $2, %ebx // %rbx = initial sequence number

	// Read request from untrusted thread, or from trusted process. In
	// either case, the data that we read has to be considered untrusted.
	// read(threadFd, &scratch, 4)
	1:xor %rax, %rax // NR_read
	mov %r13, %rdi // fd = threadFd
	mov %fs:0x0, %rsi // secure_mem
	add $0x1000, %rsi // buf = &scratch
	mov $4, %edx // len = 4
	2:syscall
	cmp $-4, %rax // EINTR
	jz 2b
	cmp %rdx, %rax
	jnz fatal_error

	// Retrieve system call number. It is crucial that we only dereference
	// %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and
	// we must use the value that we have read the first time.
	mov 0(%rsi), %eax

	// If syscall number is -1, execute an unlocked system call from the
	// secure memory area
	cmp $-1, %eax
	jnz 5f
	3:cmp %rbx, %fs:0x8
	jne fatal_error
	cmp %fs:0x10, %eax
	jne fatal_error
	mov %fs:0x18, %eax
	mov %fs:0x20, %rdi
	mov %fs:0x28, %rsi
	mov %fs:0x30, %rdx
	mov %fs:0x38, %r10
	mov %fs:0x40, %r8
	mov %fs:0x48, %r9
	cmp %rbx, %fs:0x8
	jne fatal_error
	add $2, %rbx

	// clone() has unusual calling conventions and must be handled
	// specially
	cmp $__NR_clone, %rax
	jz 19f

	// shmget() gets some special treatment. Whenever we return from this
	// system call, we remember the most recently returned SysV shm id.
	cmp $__NR_shmget, %eax
	jnz 4f
	syscall
	mov %rax, %r8
	mov $__NR_clone, %eax
	mov $17, %edi // flags = SIGCHLD
	mov $1, %esi // stack = 1
	syscall
	test %rax, %rax
	js fatal_error
	mov %rax, %rdi
	jnz 8f // wait for child, then return result
	mov %fs:0x0, %rdi // start = secure_mem
	mov $4096, %esi // len = 4096
	mov $3, %edx // prot = PROT_READ \| PROT_WRITE
	mov $__NR_mprotect, %eax
	syscall
	CHECK_SYSCALL_ZERO
	mov %r8d, 0xDC(%rdi) // set most recently returned SysV shm id
	xor %rdi, %rdi

	// When debugging messages are enabled, warn about expensive system
	// calls
	#ifndef NDEBUG
	cmpw $0, %fs:0xD8 // debug mode
	jz 27f
	mov $__NR_write, %eax
	mov $2, %edi // fd = stderr
	lea 101f(%rip), %rsi // "This is an expensive system call"
	mov $102f-101f, %edx // len = strlen(msg)
	syscall
	xor %rdi, %rdi
	#endif

	jmp 27f // exit program, no message
	4:syscall
	jmp 15f // return result

	// If syscall number is -2, execute locked system call from the
	// secure memory area
	5:jg 12f
	cmp $-2, %eax
	jnz 9f
	cmp %rbx, %fs:0x8
	jne fatal_error
	cmp %eax, %fs:0x10
	jne fatal_error

	// When debugging messages are enabled, warn about expensive system
	// calls
	#ifndef NDEBUG
	cmpw $0, %fs:0xD8 // debug mode
	jz 6f
	mov $__NR_write, %eax
	mov $2, %edi // fd = stderr
	lea 101f(%rip), %rsi // "This is an expensive system call"
	mov $102f-101f, %edx // len = strlen(msg)
	syscall
	6:
	#endif

	mov %fs:0x18, %eax
	mov %fs:0x20, %rdi
	mov %fs:0x28, %rsi
	mov %fs:0x30, %rdx
	mov %fs:0x38, %r10
	mov %fs:0x40, %r8
	mov %fs:0x48, %r9
	cmp %rbx, %fs:0x8
	jne fatal_error

	// exit() terminates trusted thread
	cmp $__NR_exit, %eax
	jz 18f

	// Perform requested system call
	syscall

	// Unlock mutex
	7:cmp %rbx, %fs:0x8
	jne fatal_error
	mov %fs:0, %r12
	add $2, %rbx
	mov %rax, %r8
	mov $__NR_clone, %eax
	mov $17, %rdi // flags = SIGCHLD
	mov $1, %rsi // stack = 1
	syscall
	test %rax, %rax
	js fatal_error
	jz 22f // unlock and exit
	mov %rax, %rdi
	8:xor %rsi, %rsi
	xor %rdx, %rdx
	xor %r10, %r10
	mov $__NR_wait4, %eax
	syscall
	cmp $-4, %eax // EINTR
	jz 8b
	mov %r8, %rax
	jmp 15f // return result

	// If syscall number is -3, read the time stamp counter
	9:cmp $-3, %eax
	jnz 10f
	rdtsc // sets %edx:%eax
	xor %rcx, %rcx
	jmp 11f
	10:cmp $-4, %eax
	jnz 12f
	rdtscp // sets %edx:%eax and %ecx
	11:add $0x3C, %rsi
	mov %eax, 0(%rsi)
	mov %edx, 4(%rsi)
	mov %ecx, 8(%rsi)
	mov $12, %edx
	jmp 16f // return result

	// Check in syscallTable whether this system call is unrestricted
	12:mov %rax, %r9
	#ifndef NDEBUG
	cmpw $0, %fs:0xD8 // debug mode
	jnz 13f
	#endif
	cmp %fs:0xFC, %eax // maxSyscall
	ja fatal_error
	shl $4, %rax
	mov %fs:0x100, %rdi // syscallTable
	add %rdi, %rax
	mov 0(%rax), %rax
	cmp $1, %rax
	jne fatal_error

	// Default behavior for unrestricted system calls is to just execute
	// them. Read the remaining arguments first.
	13:mov %rsi, %r8
	xor %rax, %rax // NR_read
	mov %r13, %rdi // fd = threadFd
	add $4, %rsi // buf = &scratch + 4
	mov $48, %edx // len = 6sizeof(void )
	14:syscall
	cmp $-4, %rax // EINTR
	jz 14b
	cmp %rdx, %rax
	jnz fatal_error
	mov %r9, %rax
	mov 0x04(%r8), %rdi
	mov 0x0C(%r8), %rsi
	mov 0x14(%r8), %rdx
	mov 0x1C(%r8), %r10
	mov 0x2C(%r8), %r9
	mov 0x24(%r8), %r8
	cmp $__NR_exit_group, %rax
	jz 27f // exit program, no message
	syscall

	// Return result of system call to sandboxed thread
	15:mov %fs:0x0, %rsi // secure_mem
	add $0x1034, %rsi // buf = &scratch + 52
	mov %rax, (%rsi)
	mov $8, %edx // len = 8
	16:mov %r13, %rdi // fd = threadFd
	mov $__NR_write, %eax
	17:syscall
	cmp %rdx, %rax
	jz 1b
	cmp $-4, %rax // EINTR
	jz 17b
	jmp fatal_error

	// NR_exit:
	// Exit trusted thread after cleaning up resources
	18:mov %fs:0x0, %r12 // secure_mem
	mov 0xF0(%r12), %rdi // fd = threadFdPub
	mov $__NR_close, %eax
	syscall
	CHECK_SYSCALL_ZERO
	mov %r12, %rdi // start = secure_mem
	mov $8192, %esi // length = 8192
	xor %rdx, %rdx // prot = PROT_NONE
	mov $__NR_mprotect, %eax
	syscall
	CHECK_SYSCALL_ZERO
	mov %r13, %rdi // fd = threadFd
	mov $__NR_close, %eax
	syscall
	CHECK_SYSCALL_ZERO
	mov $__NR_clone, %eax
	mov $17, %rdi // flags = SIGCHLD
	mov $1, %rsi // stack = 1
	syscall
	mov %rax, %rdi
	test %rax, %rax
	js 27f // exit process
	jne 21f // reap helper, exit thread
	jmp 22f // unlock mutex

	// NR_clone:
	// Original trusted thread calls clone() to create new nascent
	// thread. This thread is (typically) fully privileged and shares all
	// resources with the caller (i.e. the previous trusted thread),
	// and by extension it shares all resources with the sandbox'd
	// threads.
	19:mov %fs:0x0, %rbp // %rbp = old_shared_mem
	mov %rsi, %r15 // remember child stack
	mov $1, %rsi // stack = 1
	syscall // calls NR_clone
	cmp $-4095, %rax // return codes -1..-4095 are errno values
	jae 7b // unlock mutex, return result
	test %rax, %rax
	jne 15b // return result

	// In nascent thread, now.
	// Undo sequence number increase that was made for the general case.
	sub $2, %rbx

	// We want to maintain an invalid %rsp whenver we access untrusted
	// memory. This ensures that even if an attacker can trick us into
	// triggering a SIGSEGV, we will never successfully execute a signal
	// handler.
	// Signal handlers are inherently dangerous, as an attacker could trick
	// us into returning to the wrong address by adjusting the signal stack
	// right before the handler returns.
	// N.B. While POSIX is curiously silent about this, it appears that on
	// Linux, alternate signal stacks are a per-thread property. That is
	// good. It means that this security mechanism works, even if the
	// sandboxed thread manages to set up an alternate signal stack.
	//
	// TODO(markus): We currently do not support emulating calls to
	// sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
	// for a discussion on how to fix this, if this ever becomes neccessary
	mov %r15, %r9 // %r9 = child_stack
	xor %r15, %r15 // Request to return from clone() when done

	// Get thread id of nascent thread
	20:mov $__NR_gettid, %eax
	syscall
	mov %rax, %r14

	// Nascent thread creates socketpair() for sending requests to
	// trusted thread.
	// We can create the filehandles on the child's stack. Filehandles are
	// always treated as untrusted.
	// socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
	sub $0x10, %r9
	mov %r15, 8(%r9) // preserve return address on child stack
	mov $__NR_socketpair, %eax
	mov $1, %edi // domain = AF_UNIX
	mov $1, %esi // type = SOCK_STREAM
	xor %rdx, %rdx // protocol = 0
	mov %r9, %r10 // sv = child_stack
	syscall
	test %rax, %rax
	jz 28f

	// If things went wrong, we don't have an (easy) way of signaling
	// the parent. For our purposes, it is sufficient to fail with a
	// fatal error.
	jmp fatal_error
	21:xor %rsi, %rsi
	xor %rdx, %rdx
	xor %r10, %r10
	mov $__NR_wait4, %eax
	syscall
	cmp $-4, %eax // EINTR
	jz 21b
	jmp 23f // exit thread (no message)
	// Unlock syscallMutex and exit.
	// On entry %r12 = secureMem. We cannot use %fs:0 in the case where
	// the page has been mprotect()'d to PROT_NONE.
	22:mov %r12, %rdi
	mov $4096, %esi
	mov $3, %edx // prot = PROT_READ \| PROT_WRITE
	mov $__NR_mprotect, %eax
	syscall
	CHECK_SYSCALL_ZERO
	add $0xF8, %rdi
	lock; addl $0x80000000, (%rdi)
	jz 23f // exit thread
	mov $1, %edx
	mov %rdx, %rsi // FUTEX_WAKE
	mov $__NR_futex, %eax
	syscall
	23:mov $__NR_exit, %eax
	mov $1, %edi // status = 1
	24:syscall
	fatal_error:
	mov $__NR_write, %eax
	mov $2, %edi // fd = stderr
	lea 100f(%rip), %rsi // "Sandbox violation detected"
	mov $101f-100f, %edx // len = strlen(msg)
	syscall
	26:mov $1, %edi
	27:mov $__NR_exit_group, %eax
	jmp 24b

	// The first page is mapped read-only for use as securely shared memory
	28:mov 0xC8(%rbp), %r12 // %r12 = secure shared memory
	cmp %rbx, 8(%rbp)
	jne fatal_error
	mov $__NR_mprotect, %eax
	mov %r12, %rdi // addr = secure_mem
	mov $4096, %esi // len = 4096
	mov $1, %edx // prot = PROT_READ
	syscall
	CHECK_SYSCALL_ZERO

	// The second page is used as scratch space by the trusted thread.
	// Make it writable.
	mov $__NR_mprotect, %eax
	add $4096, %rdi // addr = secure_mem + 4096
	mov $3, %edx // prot = PROT_READ \| PROT_WRITE
	syscall
	CHECK_SYSCALL_ZERO

	// Call clone() to create new trusted thread().
	// clone(CLONE_VM\|CLONE_FS\|CLONE_FILES\|CLONE_SIGHAND\|CLONE_THREAD\|
	// CLONE_SYSVSEM\|CLONE_UNTRACED\|CLONE_SETTLS, stack, NULL, NULL,
	// tls)
	mov 4(%r9), %r13d // %r13 = threadFd (on child's stack)
	mov $__NR_clone, %eax
	mov $0x8D0F00, %edi // flags = VM\|FS\|FILES\|SIGH\|THR\|SYSV\|UTR\|TLS
	mov $1, %rsi // stack = 1
	mov %r12, %r8 // tls = new_secure_mem
	cmp %rbx, 8(%rbp)
	jne fatal_error
	syscall
	test %rax, %rax
	js fatal_error
	jz 0b // invoke trustedThreadFnc()

	// Copy the caller's signal mask
	mov 0x1054(%rbp), %rax
	mov %rax, 0x1054(%r12)

	// Done creating trusted thread. We can get ready to return to caller
	mov %r9, %r8 // %r8 = child_stack
	mov 0(%r9), %r9d // %r9 = threadFdPub

	// Set up thread local storage with information on how to talk to
	// trusted thread and trusted process.
	lea 0xE0(%r12), %rsi // args = &secure_mem.TLS;
	mov $__NR_arch_prctl, %eax
	mov $0x1001, %edi // option = ARCH_SET_GS
	syscall
	cmp $-4095, %rax // return codes -1..-4095 are errno values
	jae fatal_error

	add $0x10, %r8

	// Check the sequence number
	cmp %rbx, 8(%rbp)
	jne fatal_error

	// Nascent thread launches a helper that doesn't share any of our
	// resources, except for pages mapped as MAP_SHARED.
	// clone(SIGCHLD, stack=1)
	mov $__NR_clone, %eax
	mov $17, %rdi // flags = SIGCHLD
	mov $1, %rsi // stack = 1
	syscall
	test %rax, %rax
	js fatal_error
	jne 31f

	// Use sendmsg() to send to the trusted process the file handles for
	// communicating with the new trusted thread. We also send the address
	// of the secure memory area (for sanity checks) and the thread id.
	// transport = Sandbox::cloneFdPub()
	mov playground$cloneFdPub(%rip), %edi
	cmp %rbx, 8(%rbp)
	jne fatal_error

	// 0x00 msg:
	// 0x00 msg_name ($0)
	// 0x08 msg_namelen ($0)
	// 0x10 msg_iov (%r8 + 0x44)
	// 0x18 msg_iovlen ($1)
	// 0x20 msg_control (%r8 + 0x54)
	// 0x28 msg_controllen ($0x18)
	// 0x30 data:
	// 0x30 msg_flags/err ($0)
	// 0x34 secure_mem (%r12)
	// 0x3C threadId (%r14d)
	// 0x40 threadFdPub (%r9d)
	// 0x44 iov:
	// 0x44 iov_base (%r8 + 0x30)
	// 0x4C iov_len ($0x14)
	// 0x54 cmsg:
	// 0x54 cmsg_len ($0x18)
	// 0x5C cmsg_level ($1, SOL_SOCKET)
	// 0x60 cmsg_type ($1, SCM_RIGHTS)
	// 0x64 threadFdPub (%r9d)
	// 0x68 threadFd (%r13d)
	// 0x6C
	lea sendmsg_data(%rip), %r8
	xor %rdx, %rdx // flags = 0
	mov %rdx, 0x00(%r8) // msg_name
	mov %edx, 0x08(%r8) // msg_namelen
	mov %edx, 0x30(%r8) // msg_flags
	mov $1, %r11d
	mov %r11, 0x18(%r8) // msg_iovlen
	mov %r11d, 0x5C(%r8) // cmsg_level
	mov %r11d, 0x60(%r8) // cmsg_type
	lea 0x30(%r8), %r11
	mov %r11, 0x44(%r8) // iov_base
	add $0x14, %r11
	mov %r11, 0x10(%r8) // msg_iov
	add $0x10, %r11
	mov %r11, 0x20(%r8) // msg_control
	mov $0x14, %r11d
	mov %r11, 0x4C(%r8) // iov_len
	add $4, %r11d
	mov %r11, 0x28(%r8) // msg_controllen
	mov %r11, 0x54(%r8) // cmsg_len
	mov %r12, 0x34(%r8) // secure_mem
	mov %r14d, 0x3C(%r8) // threadId
	mov %r9d, 0x40(%r8) // threadFdPub
	mov %r9d, 0x64(%r8) // threadFdPub
	mov %r13d, 0x68(%r8) // threadFd
	mov $__NR_sendmsg, %eax
	mov %r8, %rsi // msg
	syscall
	30:xor %rdi, %rdi
	jmp 27b // exit process (no error message)

	// Reap helper
	31:mov %rax, %rdi
	32:lea -4(%r8), %rsi
	xor %rdx, %rdx
	xor %r10, %r10
	mov $__NR_wait4, %eax
	syscall
	cmp $-4, %eax // EINTR
	jz 32b
	mov -4(%r8), %eax
	test %rax, %rax
	jnz 26b // exit process (no error message)

	// Release privileges by entering seccomp mode.
	mov $__NR_prctl, %eax
	mov $22, %edi // PR_SET_SECCOMP
	mov $1, %esi
	syscall
	CHECK_SYSCALL_ZERO

	// We can finally start using the stack. Signal handlers no longer pose
	// a threat to us.
	mov %r8, %rsp

	// Back in the newly created sandboxed thread, wait for trusted process
	// to receive request. It is possible for an attacker to make us
	// continue even before the trusted process is done. This is OK. It'll
	// result in us putting stale values into the new thread's TLS. But
	// that data is considered untrusted anyway.
	push %rax
	mov $1, %edx // len = 1
	mov %rsp, %rsi // buf = %rsp
	mov %r9, %rdi // fd = threadFdPub
	33:xor %rax, %rax // NR_read
	syscall
	cmp $-4, %rax // EINTR
	jz 33b
	cmp %rdx, %rax
	jne fatal_error
	pop %rax

	// Returning to the place where clone() had been called. We rely on
	// using rt_sigreturn() for restoring our registers. The caller already
	// created a signal stack frame and patched the register values
	// with the ones that were in effect prior to calling sandbox_clone().
	mov $__NR_rt_sigreturn, %eax
	syscall

	.pushsection ".rodata"
	100:.ascii "Sandbox violation detected, program aborted\n"
	101:.ascii "WARNING! This is an expensive system call\n"
	102:
	.popsection

	999:pop %rbp
	pop %rbx
	ret


	.bss
	// Reserve space for sendmsg() data. This is used in a fork()'d
	// helper process, so in principle this could safely overlap and
	// overwrite other data, but it is such a small amount of memory
	// that it is not worth trying to do that. The only requirement
	// is that this must be in a MAP_PRIVATE mapping so that an
	// untrusted thread cannot modify the forked subprocess's copy.
	sendmsg_data:
	.space 0x6C