| // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <asm/unistd.h> |
| |
| |
| #define CHECK_SYSCALL_ZERO test %rax, %rax; jnz fatal_error |
| |
| |
| .internal playground$runTrustedThread |
| .global playground$runTrustedThread |
| playground$runTrustedThread: |
| push %rbx |
| push %rbp |
| mov %rdi, %rbp // %rbp = args |
| xor %rbx, %rbx // initial sequence number |
| lea 999f(%rip), %r15 // continue in same thread |
| |
| // Signal handlers are process-wide. This means that for security |
| // reasons, we cannot allow that the trusted thread ever executes any |
| // signal handlers. |
| // We prevent the execution of signal handlers by setting a signal |
| // mask that blocks all signals. In addition, we make sure that the |
| // stack pointer is invalid. |
| // We cannot reset the signal mask until after we have enabled |
| // Seccomp mode. Our sigprocmask() wrapper would normally do this by |
| // raising a signal, modifying the signal mask in the kernel-generated |
| // signal frame, and then calling sigreturn(). This presents a bit of |
| // a Catch-22, as all signals are masked and we can therefore not |
| // raise any signal that would allow us to generate the signal stack |
| // frame. |
| // Instead, we have to create the signal stack frame prior to entering |
| // Seccomp mode. This incidentally also helps us to restore the |
| // signal mask to the same value that it had prior to entering the |
| // sandbox. |
| // The signal wrapper for clone() is the second entry point into this |
| // code (by means of sending an IPC to its trusted thread). It goes |
| // through the same steps of creating a signal stack frame on the |
| // newly created thread's stacks prior to cloning. See clone.cc for |
| // details. |
| mov $__NR_clone + 0xF000, %eax |
| mov %rsp, %rcx |
| int $0 // push a signal stack frame (see clone.cc) |
| mov %rcx, 0xA0(%rsp) // pop stack upon call to sigreturn() |
| mov %r15, 0xA8(%rsp) // return address: continue in same thread |
| mov %rsp, %r9 |
| mov $2, %rdi // how = SIG_SETMASK |
| pushq $-1 |
| mov %rsp, %rsi // set = full mask |
| xor %rdx, %rdx // old_set = NULL |
| mov $8, %r10 // mask all 64 signals |
| mov $__NR_rt_sigprocmask, %eax |
| syscall |
| CHECK_SYSCALL_ZERO |
| xor %rsp, %rsp // invalidate the stack in all trusted code |
| jmp 20f // create trusted thread |
| |
| // TODO(markus): Coalesce the read() operations by reading into a |
| // bigger buffer. |
| |
| // Parameters: |
| // *%fs: secure memory region |
| // the page following this one contains the scratch space |
| // %r13: thread's side of threadFd |
| |
| // Local variables: |
| // %rbx: sequence number for trusted calls |
| |
| // Temporary variables: |
| // %r8: child stack |
| // %r9: system call number, child stack |
| // %rbp: secure memory of previous thread |
| |
| // Layout of secure shared memory region (c.f. securemem.h): |
| // 0x00: pointer to the secure shared memory region (i.e. self) |
| // 0x08: sequence number; must match %rbx |
| // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2 |
| // 0x18: system call number; passed to syscall in %rax |
| // 0x20: first argument; passed to syscall in %rdi |
| // 0x28: second argument; passed to syscall in %rsi |
| // 0x30: third argument; passed to syscall in %rdx |
| // 0x38: fourth argument; passed to syscall in %r10 |
| // 0x40: fifth argument; passed to syscall in %r8 |
| // 0x48: sixth argument; passed to syscall in %r9 |
| // 0x50-0xC0: no longer used |
| // 0xC8: new shared memory for clone() |
| // 0xD0: no longer used |
| // 0xD4: no longer used |
| // 0xD8: set to non-zero, if in debugging mode |
| // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE) |
| // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE) |
| // 0xE8: thread id (TLS_TID) |
| // 0xF0: threadFdPub (TLS_THREAD_FD) |
| // 0xF8: syscallMutex |
| // 0xFC: maxSyscall |
| // 0x100: syscallTable |
| // 0x200-0x1000: securely passed verified file name(s) |
| |
| // Layout of (untrusted) scratch space: |
| // 0x00: syscall number; passed in %rax |
| // 0x04: first argument; passed in %rdi |
| // 0x0C: second argument; passed in %rsi |
| // 0x14: third argument; passed in %rdx |
| // 0x1C: fourth argument; passed in %r10 |
| // 0x24: fifth argument; passed in %r8 |
| // 0x2C: sixth argument; passed in %r9 |
| // 0x34: return value |
| // 0x3C: RDTSCP result (%eax) |
| // 0x40: RDTSCP result (%edx) |
| // 0x44: RDTSCP result (%ecx) |
| // 0x48: last system call (not used on x86-64) |
| // 0x4C: number of consecutive calls to a time fnc; unused on x86-64 |
| // 0x50: nesting level of system calls (for debugging purposes only) |
| // 0x54: signal mask |
| // 0x5C: in SEGV handler |
| |
| // We use the %fs register for accessing the secure read-only page, and |
| // the untrusted scratch space immediately following it. The segment |
| // register and the local descriptor table is set up by passing |
| // appropriate arguments to clone(). |
| |
| 0:xor %rsp, %rsp |
| mov $2, %ebx // %rbx = initial sequence number |
| |
| // Read request from untrusted thread, or from trusted process. In |
| // either case, the data that we read has to be considered untrusted. |
| // read(threadFd, &scratch, 4) |
| 1:xor %rax, %rax // NR_read |
| mov %r13, %rdi // fd = threadFd |
| mov %fs:0x0, %rsi // secure_mem |
| add $0x1000, %rsi // buf = &scratch |
| mov $4, %edx // len = 4 |
| 2:syscall |
| cmp $-4, %rax // EINTR |
| jz 2b |
| cmp %rdx, %rax |
| jnz fatal_error |
| |
| // Retrieve system call number. It is crucial that we only dereference |
| // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and |
| // we must use the value that we have read the first time. |
| mov 0(%rsi), %eax |
| |
| // If syscall number is -1, execute an unlocked system call from the |
| // secure memory area |
| cmp $-1, %eax |
| jnz 5f |
| 3:cmp %rbx, %fs:0x8 |
| jne fatal_error |
| cmp %fs:0x10, %eax |
| jne fatal_error |
| mov %fs:0x18, %eax |
| mov %fs:0x20, %rdi |
| mov %fs:0x28, %rsi |
| mov %fs:0x30, %rdx |
| mov %fs:0x38, %r10 |
| mov %fs:0x40, %r8 |
| mov %fs:0x48, %r9 |
| cmp %rbx, %fs:0x8 |
| jne fatal_error |
| add $2, %rbx |
| |
| // clone() has unusual calling conventions and must be handled |
| // specially |
| cmp $__NR_clone, %rax |
| jz 19f |
| |
| // shmget() gets some special treatment. Whenever we return from this |
| // system call, we remember the most recently returned SysV shm id. |
| cmp $__NR_shmget, %eax |
| jnz 4f |
| syscall |
| mov %rax, %r8 |
| mov $__NR_clone, %eax |
| mov $17, %edi // flags = SIGCHLD |
| mov $1, %esi // stack = 1 |
| syscall |
| test %rax, %rax |
| js fatal_error |
| mov %rax, %rdi |
| jnz 8f // wait for child, then return result |
| mov %fs:0x0, %rdi // start = secure_mem |
| mov $4096, %esi // len = 4096 |
| mov $3, %edx // prot = PROT_READ | PROT_WRITE |
| mov $__NR_mprotect, %eax |
| syscall |
| CHECK_SYSCALL_ZERO |
| mov %r8d, 0xDC(%rdi) // set most recently returned SysV shm id |
| xor %rdi, %rdi |
| |
| // When debugging messages are enabled, warn about expensive system |
| // calls |
| #ifndef NDEBUG |
| cmpw $0, %fs:0xD8 // debug mode |
| jz 27f |
| mov $__NR_write, %eax |
| mov $2, %edi // fd = stderr |
| lea 101f(%rip), %rsi // "This is an expensive system call" |
| mov $102f-101f, %edx // len = strlen(msg) |
| syscall |
| xor %rdi, %rdi |
| #endif |
| |
| jmp 27f // exit program, no message |
| 4:syscall |
| jmp 15f // return result |
| |
| // If syscall number is -2, execute locked system call from the |
| // secure memory area |
| 5:jg 12f |
| cmp $-2, %eax |
| jnz 9f |
| cmp %rbx, %fs:0x8 |
| jne fatal_error |
| cmp %eax, %fs:0x10 |
| jne fatal_error |
| |
| // When debugging messages are enabled, warn about expensive system |
| // calls |
| #ifndef NDEBUG |
| cmpw $0, %fs:0xD8 // debug mode |
| jz 6f |
| mov $__NR_write, %eax |
| mov $2, %edi // fd = stderr |
| lea 101f(%rip), %rsi // "This is an expensive system call" |
| mov $102f-101f, %edx // len = strlen(msg) |
| syscall |
| 6: |
| #endif |
| |
| mov %fs:0x18, %eax |
| mov %fs:0x20, %rdi |
| mov %fs:0x28, %rsi |
| mov %fs:0x30, %rdx |
| mov %fs:0x38, %r10 |
| mov %fs:0x40, %r8 |
| mov %fs:0x48, %r9 |
| cmp %rbx, %fs:0x8 |
| jne fatal_error |
| |
| // exit() terminates trusted thread |
| cmp $__NR_exit, %eax |
| jz 18f |
| |
| // Perform requested system call |
| syscall |
| |
| // Unlock mutex |
| 7:cmp %rbx, %fs:0x8 |
| jne fatal_error |
| mov %fs:0, %r12 |
| add $2, %rbx |
| mov %rax, %r8 |
| mov $__NR_clone, %eax |
| mov $17, %rdi // flags = SIGCHLD |
| mov $1, %rsi // stack = 1 |
| syscall |
| test %rax, %rax |
| js fatal_error |
| jz 22f // unlock and exit |
| mov %rax, %rdi |
| 8:xor %rsi, %rsi |
| xor %rdx, %rdx |
| xor %r10, %r10 |
| mov $__NR_wait4, %eax |
| syscall |
| cmp $-4, %eax // EINTR |
| jz 8b |
| mov %r8, %rax |
| jmp 15f // return result |
| |
| // If syscall number is -3, read the time stamp counter |
| 9:cmp $-3, %eax |
| jnz 10f |
| rdtsc // sets %edx:%eax |
| xor %rcx, %rcx |
| jmp 11f |
| 10:cmp $-4, %eax |
| jnz 12f |
| rdtscp // sets %edx:%eax and %ecx |
| 11:add $0x3C, %rsi |
| mov %eax, 0(%rsi) |
| mov %edx, 4(%rsi) |
| mov %ecx, 8(%rsi) |
| mov $12, %edx |
| jmp 16f // return result |
| |
| // Check in syscallTable whether this system call is unrestricted |
| 12:mov %rax, %r9 |
| #ifndef NDEBUG |
| cmpw $0, %fs:0xD8 // debug mode |
| jnz 13f |
| #endif |
| cmp %fs:0xFC, %eax // maxSyscall |
| ja fatal_error |
| shl $4, %rax |
| mov %fs:0x100, %rdi // syscallTable |
| add %rdi, %rax |
| mov 0(%rax), %rax |
| cmp $1, %rax |
| jne fatal_error |
| |
| // Default behavior for unrestricted system calls is to just execute |
| // them. Read the remaining arguments first. |
| 13:mov %rsi, %r8 |
| xor %rax, %rax // NR_read |
| mov %r13, %rdi // fd = threadFd |
| add $4, %rsi // buf = &scratch + 4 |
| mov $48, %edx // len = 6*sizeof(void *) |
| 14:syscall |
| cmp $-4, %rax // EINTR |
| jz 14b |
| cmp %rdx, %rax |
| jnz fatal_error |
| mov %r9, %rax |
| mov 0x04(%r8), %rdi |
| mov 0x0C(%r8), %rsi |
| mov 0x14(%r8), %rdx |
| mov 0x1C(%r8), %r10 |
| mov 0x2C(%r8), %r9 |
| mov 0x24(%r8), %r8 |
| cmp $__NR_exit_group, %rax |
| jz 27f // exit program, no message |
| syscall |
| |
| // Return result of system call to sandboxed thread |
| 15:mov %fs:0x0, %rsi // secure_mem |
| add $0x1034, %rsi // buf = &scratch + 52 |
| mov %rax, (%rsi) |
| mov $8, %edx // len = 8 |
| 16:mov %r13, %rdi // fd = threadFd |
| mov $__NR_write, %eax |
| 17:syscall |
| cmp %rdx, %rax |
| jz 1b |
| cmp $-4, %rax // EINTR |
| jz 17b |
| jmp fatal_error |
| |
| // NR_exit: |
| // Exit trusted thread after cleaning up resources |
| 18:mov %fs:0x0, %r12 // secure_mem |
| mov 0xF0(%r12), %rdi // fd = threadFdPub |
| mov $__NR_close, %eax |
| syscall |
| CHECK_SYSCALL_ZERO |
| mov %r12, %rdi // start = secure_mem |
| mov $8192, %esi // length = 8192 |
| xor %rdx, %rdx // prot = PROT_NONE |
| mov $__NR_mprotect, %eax |
| syscall |
| CHECK_SYSCALL_ZERO |
| mov %r13, %rdi // fd = threadFd |
| mov $__NR_close, %eax |
| syscall |
| CHECK_SYSCALL_ZERO |
| mov $__NR_clone, %eax |
| mov $17, %rdi // flags = SIGCHLD |
| mov $1, %rsi // stack = 1 |
| syscall |
| mov %rax, %rdi |
| test %rax, %rax |
| js 27f // exit process |
| jne 21f // reap helper, exit thread |
| jmp 22f // unlock mutex |
| |
| // NR_clone: |
| // Original trusted thread calls clone() to create new nascent |
| // thread. This thread is (typically) fully privileged and shares all |
| // resources with the caller (i.e. the previous trusted thread), |
| // and by extension it shares all resources with the sandbox'd |
| // threads. |
| 19:mov %fs:0x0, %rbp // %rbp = old_shared_mem |
| mov %rsi, %r15 // remember child stack |
| mov $1, %rsi // stack = 1 |
| syscall // calls NR_clone |
| cmp $-4095, %rax // return codes -1..-4095 are errno values |
| jae 7b // unlock mutex, return result |
| test %rax, %rax |
| jne 15b // return result |
| |
| // In nascent thread, now. |
| // Undo sequence number increase that was made for the general case. |
| sub $2, %rbx |
| |
| // We want to maintain an invalid %rsp whenver we access untrusted |
| // memory. This ensures that even if an attacker can trick us into |
| // triggering a SIGSEGV, we will never successfully execute a signal |
| // handler. |
| // Signal handlers are inherently dangerous, as an attacker could trick |
| // us into returning to the wrong address by adjusting the signal stack |
| // right before the handler returns. |
| // N.B. While POSIX is curiously silent about this, it appears that on |
| // Linux, alternate signal stacks are a per-thread property. That is |
| // good. It means that this security mechanism works, even if the |
| // sandboxed thread manages to set up an alternate signal stack. |
| // |
| // TODO(markus): We currently do not support emulating calls to |
| // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc |
| // for a discussion on how to fix this, if this ever becomes neccessary |
| mov %r15, %r9 // %r9 = child_stack |
| xor %r15, %r15 // Request to return from clone() when done |
| |
| // Get thread id of nascent thread |
| 20:mov $__NR_gettid, %eax |
| syscall |
| mov %rax, %r14 |
| |
| // Nascent thread creates socketpair() for sending requests to |
| // trusted thread. |
| // We can create the filehandles on the child's stack. Filehandles are |
| // always treated as untrusted. |
| // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) |
| sub $0x10, %r9 |
| mov %r15, 8(%r9) // preserve return address on child stack |
| mov $__NR_socketpair, %eax |
| mov $1, %edi // domain = AF_UNIX |
| mov $1, %esi // type = SOCK_STREAM |
| xor %rdx, %rdx // protocol = 0 |
| mov %r9, %r10 // sv = child_stack |
| syscall |
| test %rax, %rax |
| jz 28f |
| |
| // If things went wrong, we don't have an (easy) way of signaling |
| // the parent. For our purposes, it is sufficient to fail with a |
| // fatal error. |
| jmp fatal_error |
| 21:xor %rsi, %rsi |
| xor %rdx, %rdx |
| xor %r10, %r10 |
| mov $__NR_wait4, %eax |
| syscall |
| cmp $-4, %eax // EINTR |
| jz 21b |
| jmp 23f // exit thread (no message) |
| // Unlock syscallMutex and exit. |
| // On entry %r12 = secureMem. We cannot use %fs:0 in the case where |
| // the page has been mprotect()'d to PROT_NONE. |
| 22:mov %r12, %rdi |
| mov $4096, %esi |
| mov $3, %edx // prot = PROT_READ | PROT_WRITE |
| mov $__NR_mprotect, %eax |
| syscall |
| CHECK_SYSCALL_ZERO |
| add $0xF8, %rdi |
| lock; addl $0x80000000, (%rdi) |
| jz 23f // exit thread |
| mov $1, %edx |
| mov %rdx, %rsi // FUTEX_WAKE |
| mov $__NR_futex, %eax |
| syscall |
| 23:mov $__NR_exit, %eax |
| mov $1, %edi // status = 1 |
| 24:syscall |
| fatal_error: |
| mov $__NR_write, %eax |
| mov $2, %edi // fd = stderr |
| lea 100f(%rip), %rsi // "Sandbox violation detected" |
| mov $101f-100f, %edx // len = strlen(msg) |
| syscall |
| 26:mov $1, %edi |
| 27:mov $__NR_exit_group, %eax |
| jmp 24b |
| |
| // The first page is mapped read-only for use as securely shared memory |
| 28:mov 0xC8(%rbp), %r12 // %r12 = secure shared memory |
| cmp %rbx, 8(%rbp) |
| jne fatal_error |
| mov $__NR_mprotect, %eax |
| mov %r12, %rdi // addr = secure_mem |
| mov $4096, %esi // len = 4096 |
| mov $1, %edx // prot = PROT_READ |
| syscall |
| CHECK_SYSCALL_ZERO |
| |
| // The second page is used as scratch space by the trusted thread. |
| // Make it writable. |
| mov $__NR_mprotect, %eax |
| add $4096, %rdi // addr = secure_mem + 4096 |
| mov $3, %edx // prot = PROT_READ | PROT_WRITE |
| syscall |
| CHECK_SYSCALL_ZERO |
| |
| // Call clone() to create new trusted thread(). |
| // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| |
| // CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL, |
| // tls) |
| mov 4(%r9), %r13d // %r13 = threadFd (on child's stack) |
| mov $__NR_clone, %eax |
| mov $0x8D0F00, %edi // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS |
| mov $1, %rsi // stack = 1 |
| mov %r12, %r8 // tls = new_secure_mem |
| cmp %rbx, 8(%rbp) |
| jne fatal_error |
| syscall |
| test %rax, %rax |
| js fatal_error |
| jz 0b // invoke trustedThreadFnc() |
| |
| // Copy the caller's signal mask |
| mov 0x1054(%rbp), %rax |
| mov %rax, 0x1054(%r12) |
| |
| // Done creating trusted thread. We can get ready to return to caller |
| mov %r9, %r8 // %r8 = child_stack |
| mov 0(%r9), %r9d // %r9 = threadFdPub |
| |
| // Set up thread local storage with information on how to talk to |
| // trusted thread and trusted process. |
| lea 0xE0(%r12), %rsi // args = &secure_mem.TLS; |
| mov $__NR_arch_prctl, %eax |
| mov $0x1001, %edi // option = ARCH_SET_GS |
| syscall |
| cmp $-4095, %rax // return codes -1..-4095 are errno values |
| jae fatal_error |
| |
| add $0x10, %r8 |
| |
| // Check the sequence number |
| cmp %rbx, 8(%rbp) |
| jne fatal_error |
| |
| // Nascent thread launches a helper that doesn't share any of our |
| // resources, except for pages mapped as MAP_SHARED. |
| // clone(SIGCHLD, stack=1) |
| mov $__NR_clone, %eax |
| mov $17, %rdi // flags = SIGCHLD |
| mov $1, %rsi // stack = 1 |
| syscall |
| test %rax, %rax |
| js fatal_error |
| jne 31f |
| |
| // Use sendmsg() to send to the trusted process the file handles for |
| // communicating with the new trusted thread. We also send the address |
| // of the secure memory area (for sanity checks) and the thread id. |
| // transport = Sandbox::cloneFdPub() |
| mov playground$cloneFdPub(%rip), %edi |
| cmp %rbx, 8(%rbp) |
| jne fatal_error |
| |
| // 0x00 msg: |
| // 0x00 msg_name ($0) |
| // 0x08 msg_namelen ($0) |
| // 0x10 msg_iov (%r8 + 0x44) |
| // 0x18 msg_iovlen ($1) |
| // 0x20 msg_control (%r8 + 0x54) |
| // 0x28 msg_controllen ($0x18) |
| // 0x30 data: |
| // 0x30 msg_flags/err ($0) |
| // 0x34 secure_mem (%r12) |
| // 0x3C threadId (%r14d) |
| // 0x40 threadFdPub (%r9d) |
| // 0x44 iov: |
| // 0x44 iov_base (%r8 + 0x30) |
| // 0x4C iov_len ($0x14) |
| // 0x54 cmsg: |
| // 0x54 cmsg_len ($0x18) |
| // 0x5C cmsg_level ($1, SOL_SOCKET) |
| // 0x60 cmsg_type ($1, SCM_RIGHTS) |
| // 0x64 threadFdPub (%r9d) |
| // 0x68 threadFd (%r13d) |
| // 0x6C |
| lea sendmsg_data(%rip), %r8 |
| xor %rdx, %rdx // flags = 0 |
| mov %rdx, 0x00(%r8) // msg_name |
| mov %edx, 0x08(%r8) // msg_namelen |
| mov %edx, 0x30(%r8) // msg_flags |
| mov $1, %r11d |
| mov %r11, 0x18(%r8) // msg_iovlen |
| mov %r11d, 0x5C(%r8) // cmsg_level |
| mov %r11d, 0x60(%r8) // cmsg_type |
| lea 0x30(%r8), %r11 |
| mov %r11, 0x44(%r8) // iov_base |
| add $0x14, %r11 |
| mov %r11, 0x10(%r8) // msg_iov |
| add $0x10, %r11 |
| mov %r11, 0x20(%r8) // msg_control |
| mov $0x14, %r11d |
| mov %r11, 0x4C(%r8) // iov_len |
| add $4, %r11d |
| mov %r11, 0x28(%r8) // msg_controllen |
| mov %r11, 0x54(%r8) // cmsg_len |
| mov %r12, 0x34(%r8) // secure_mem |
| mov %r14d, 0x3C(%r8) // threadId |
| mov %r9d, 0x40(%r8) // threadFdPub |
| mov %r9d, 0x64(%r8) // threadFdPub |
| mov %r13d, 0x68(%r8) // threadFd |
| mov $__NR_sendmsg, %eax |
| mov %r8, %rsi // msg |
| syscall |
| 30:xor %rdi, %rdi |
| jmp 27b // exit process (no error message) |
| |
| // Reap helper |
| 31:mov %rax, %rdi |
| 32:lea -4(%r8), %rsi |
| xor %rdx, %rdx |
| xor %r10, %r10 |
| mov $__NR_wait4, %eax |
| syscall |
| cmp $-4, %eax // EINTR |
| jz 32b |
| mov -4(%r8), %eax |
| test %rax, %rax |
| jnz 26b // exit process (no error message) |
| |
| // Release privileges by entering seccomp mode. |
| mov $__NR_prctl, %eax |
| mov $22, %edi // PR_SET_SECCOMP |
| mov $1, %esi |
| syscall |
| CHECK_SYSCALL_ZERO |
| |
| // We can finally start using the stack. Signal handlers no longer pose |
| // a threat to us. |
| mov %r8, %rsp |
| |
| // Back in the newly created sandboxed thread, wait for trusted process |
| // to receive request. It is possible for an attacker to make us |
| // continue even before the trusted process is done. This is OK. It'll |
| // result in us putting stale values into the new thread's TLS. But |
| // that data is considered untrusted anyway. |
| push %rax |
| mov $1, %edx // len = 1 |
| mov %rsp, %rsi // buf = %rsp |
| mov %r9, %rdi // fd = threadFdPub |
| 33:xor %rax, %rax // NR_read |
| syscall |
| cmp $-4, %rax // EINTR |
| jz 33b |
| cmp %rdx, %rax |
| jne fatal_error |
| pop %rax |
| |
| // Returning to the place where clone() had been called. We rely on |
| // using rt_sigreturn() for restoring our registers. The caller already |
| // created a signal stack frame and patched the register values |
| // with the ones that were in effect prior to calling sandbox_clone(). |
| mov $__NR_rt_sigreturn, %eax |
| syscall |
| |
| .pushsection ".rodata" |
| 100:.ascii "Sandbox violation detected, program aborted\n" |
| 101:.ascii "WARNING! This is an expensive system call\n" |
| 102: |
| .popsection |
| |
| 999:pop %rbp |
| pop %rbx |
| ret |
| |
| |
| .bss |
| // Reserve space for sendmsg() data. This is used in a fork()'d |
| // helper process, so in principle this could safely overlap and |
| // overwrite other data, but it is such a small amount of memory |
| // that it is not worth trying to do that. The only requirement |
| // is that this must be in a MAP_PRIVATE mapping so that an |
| // untrusted thread cannot modify the forked subprocess's copy. |
| sendmsg_data: |
| .space 0x6C |