| // Copyright 2015 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Package seccomp implements support for compiling and installing Seccomp-BPF policy files. |
| // - http://www.chromium.org/chromium-os/developer-guide/chromium-os-sandboxing |
| // |
| // Typical usage: |
| // // Check for the required kernel support for seccomp. |
| // if err := seccomp.CheckSupport(); err != nil { |
| // log.Fatal(err) |
| // } |
| // |
| // // Compile BPF program from a Chromium-OS policy file. |
| // bpf, err := seccomp.Compile(path) |
| // if err != nil { |
| // log.Fatal(err) |
| // } |
| // |
| // // Install Seccomp-BPF filter program with the kernel. |
| // if err := seccomp.Install(bpf); err != nil { |
| // log.Fatal(err) |
| // } |
| // |
| // For background and more information: |
| // - http://www.tcpdump.org/papers/bpf-usenix93.pdf |
| // - http://en.wikipedia.org/wiki/Seccomp |
| // - http://lwn.net/Articles/475043/ |
| // - http://outflux.net/teach-seccomp/ |
| // - http://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt |
| // - http://github.com/torvalds/linux/blob/master/kernel/seccomp.c |
| // |
| // TODO: |
| // - Exit the program if any thread is killed because of seccomp violation. |
| // - Provide a debug mode to log system calls used during normal operation. |
| package seccomp |
| |
| import ( |
| "bytes" |
| "fmt" |
| "io/ioutil" |
| "regexp" |
| "runtime" |
| "strconv" |
| "strings" |
| "syscall" |
| "unsafe" |
| ) |
| |
| // #include <sys/prctl.h> |
| // #include <asm/unistd.h> |
| // #include <linux/seccomp.h> |
| import "C" |
| |
| // SeccompData is the format the BPF program executes over. |
| // This struct mirrors struct seccomp_data from <linux/seccomp.h>. |
| type SeccompData struct { |
| NR int32 // The system call number. |
| Arch uint32 // System call convention as an AUDIT_ARCH_* value. |
| InstructionPointer uint64 // At the time of the system call. |
| Args [6]uint64 // System call arguments (always stored as 64-bit values). |
| } |
| |
| // C version of the struct used for sanity checking. |
| type seccomp_data C.struct_seccomp_data |
| |
| // bpfLoadNR returns the instruction to load the NR field in SeccompData. |
| func bpfLoadNR() SockFilter { |
| return bpfLoad(unsafe.Offsetof(SeccompData{}.NR)) |
| } |
| |
| // bpfLoadArch returns the instruction to load the Arch field in SeccompData. |
| func bpfLoadArch() SockFilter { |
| return bpfLoad(unsafe.Offsetof(SeccompData{}.Arch)) |
| } |
| |
| // bpfLoadArg returns the instruction to load one word of an argument in SeccompData. |
| func bpfLoadArg(arg, word int) SockFilter { |
| return bpfLoad(unsafe.Offsetof(SeccompData{}.Args) + uintptr(((2*arg)+word)*4)) |
| } |
| |
| // retKill returns the code for seccomp kill action. |
| func retKill() uint32 { |
| return C.SECCOMP_RET_KILL |
| } |
| |
| // retTrap returns the code for seccomp trap action. |
| func retTrap() uint32 { |
| return C.SECCOMP_RET_TRAP |
| } |
| |
| // retErrno returns the code for seccomp errno action with the specified errno embedded. |
| func retErrno(errno syscall.Errno) uint32 { |
| return C.SECCOMP_RET_ERRNO | (uint32(errno) & C.SECCOMP_RET_DATA) |
| } |
| |
| // retAllow returns the code for seccomp allow action. |
| func retAllow() uint32 { |
| return C.SECCOMP_RET_ALLOW |
| } |
| |
| // policy represents the seccomp policy for a single syscall. |
| type policy struct { |
| // name of the syscall. |
| name string |
| |
| // expr is evaluated on the syscall arguments. |
| // nil expr evaluates to false. |
| expr orExpr |
| |
| // then is executed if the expr evaluates to true. |
| // (cannot be specified in policy file, used in tests only). |
| then SockFilter |
| |
| // default action (else) if the expr evaluates to false. |
| // nil means jump to end of program for the overall default. |
| def *SockFilter |
| } |
| |
| // orExpr is a list of and expressions. |
| type orExpr []andExpr |
| |
| // andExpr is a list of arg comparisons. |
| type andExpr []argComp |
| |
| // argComp represents a basic argument comparison in the policy. |
| type argComp struct { |
| idx int // 0..5 for indexing into SeccompData.Args. |
| oper string // comparison operator: "==", "!=", or "&". |
| val uint64 // upper 32 bits compared only if nbits>32. |
| } |
| |
| // String converts the internal policy representation back to policy file syntax. |
| func (p policy) String() string { |
| var buf bytes.Buffer |
| fmt.Fprintf(&buf, "%s: ", p.name) |
| |
| for i, and := range p.expr { |
| if i > 0 { |
| fmt.Fprintf(&buf, " || ") |
| } |
| for j, arg := range and { |
| if j > 0 { |
| fmt.Fprintf(&buf, " && ") |
| } |
| fmt.Fprintf(&buf, "arg%d %s %#x", arg.idx, arg.oper, arg.val) |
| } |
| } |
| |
| pret := func(f SockFilter) { |
| if f.Code == opRET { |
| switch f.K & C.SECCOMP_RET_ACTION { |
| case C.SECCOMP_RET_ALLOW: |
| fmt.Fprintf(&buf, "1") |
| return |
| case C.SECCOMP_RET_ERRNO: |
| fmt.Fprintf(&buf, "return %d", f.K&C.SECCOMP_RET_DATA) |
| return |
| } |
| } |
| fmt.Fprintf(&buf, "%s", f) |
| } |
| if p.then != bpfRet(retAllow()) { |
| fmt.Fprintf(&buf, " ? ") |
| pret(p.then) |
| } |
| if p.def != nil { |
| if p.expr != nil { |
| fmt.Fprintf(&buf, "; ") |
| } |
| pret(*p.def) |
| } |
| |
| return buf.String() |
| } |
| |
| // Syntax of policy line for a single syscall. |
| var ( |
| allowRE = regexp.MustCompile(`^([[:word:]]+) *: *1$`) |
| returnRE = regexp.MustCompile(`^([[:word:]]+) *: *return *([[:word:]]+)$`) |
| exprRE = regexp.MustCompile(`^([[:word:]]+) *:([^;]+)$`) |
| exprReturnRE = regexp.MustCompile(`^([[:word:]]+) *:([^;]+); *return *([[:word:]]+)$`) |
| |
| argRE = regexp.MustCompile(`^arg([0-5]) *(==|!=|&) *([[:word:]]+)$`) |
| ) |
| |
| // parseLine parses the policy line for a single syscall. |
| func parseLine(line string) (policy, error) { |
| var name, expr, ret string |
| var then SockFilter |
| var def *SockFilter |
| |
| line = strings.TrimSpace(line) |
| if match := allowRE.FindStringSubmatch(line); match != nil { |
| name = match[1] |
| def = ptr(bpfRet(retAllow())) |
| } else if match = returnRE.FindStringSubmatch(line); match != nil { |
| name = match[1] |
| ret = match[2] |
| } else if match = exprRE.FindStringSubmatch(line); match != nil { |
| name = match[1] |
| expr = match[2] |
| } else if match = exprReturnRE.FindStringSubmatch(line); match != nil { |
| name = match[1] |
| expr = match[2] |
| ret = match[3] |
| } else { |
| return policy{}, fmt.Errorf("invalid syntax") |
| } |
| |
| if _, ok := syscallNum[name]; !ok { |
| return policy{}, fmt.Errorf("unknown syscall: %s", name) |
| } |
| |
| var or orExpr |
| if expr != "" { |
| for _, sub := range strings.Split(expr, "||") { |
| var and andExpr |
| for _, arg := range strings.Split(sub, "&&") { |
| arg = strings.TrimSpace(arg) |
| match := argRE.FindStringSubmatch(arg) |
| if match == nil { |
| return policy{}, fmt.Errorf("invalid expression: %s", arg) |
| } |
| idx, err := strconv.Atoi(match[1]) |
| if err != nil { |
| return policy{}, fmt.Errorf("invalid arg: %s", arg) |
| } |
| oper := match[2] |
| val, err := strconv.ParseUint(match[3], 0, 64) |
| if err != nil { |
| return policy{}, fmt.Errorf("invalid value: %s", arg) |
| } |
| and = append(and, argComp{idx, oper, val}) |
| } |
| or = append(or, and) |
| } |
| } |
| |
| then = bpfRet(retAllow()) |
| |
| if ret != "" { |
| errno, err := strconv.ParseUint(ret, 0, 16) |
| if err != nil { |
| return policy{}, fmt.Errorf("invalid errno: %s", ret) |
| } |
| def = ptr(bpfRet(retErrno(syscall.Errno(errno)))) |
| } |
| |
| return policy{name, or, then, def}, nil |
| } |
| |
| // parseLines parses multiple policy lines, each one for a single syscall. |
| // Empty lines and lines beginning with "#" are ignored. |
| // Multiple policies for a syscall are detected and reported as error. |
| func parseLines(lines []string) ([]policy, error) { |
| var ps []policy |
| seen := make(map[string]int) |
| for i, line := range lines { |
| lineno := i + 1 |
| if line == "" || strings.HasPrefix(line, "#") { |
| continue |
| } |
| p, err := parseLine(line) |
| if err != nil { |
| return nil, fmt.Errorf("line %d: %v", lineno, err) |
| } |
| if seen[p.name] > 0 { |
| return nil, fmt.Errorf("lines %d,%d: multiple policies for %s", |
| seen[p.name], lineno, p.name) |
| } |
| seen[p.name] = lineno |
| ps = append(ps, p) |
| } |
| return ps, nil |
| } |
| |
| // parseFile reads a Chromium-OS Seccomp-BPF policy file and parses its contents. |
| func parseFile(path string) ([]policy, error) { |
| file, err := ioutil.ReadFile(path) |
| if err != nil { |
| return nil, err |
| } |
| return parseLines(strings.Split(string(file), "\n")) |
| } |
| |
| // compile compiles a Seccomp-BPF program implementing the syscall policies. |
| // long specifies whether to generate 32-bit or 64-bit argument comparisons. |
| // def is the overall default action to take when the syscall does not match |
| // any policy in the filter. |
| func compile(ps []policy, long bool, def SockFilter) ([]SockFilter, error) { |
| var bpf []SockFilter |
| do := func(insn SockFilter) { |
| bpf = append(bpf, insn) |
| } |
| |
| // ref maps a label to addresses of all the instructions that jump to it. |
| ref := make(map[string][]int) |
| jump := func(name string) { |
| // jump to a label with unresolved address: insert a placeholder instruction. |
| ref[name] = append(ref[name], len(bpf)) |
| do(SockFilter{}) |
| } |
| label := func(name string) { |
| // label address resolved: replace placeholder instructions with actual jumps. |
| for _, i := range ref[name] { |
| bpf[i] = bpfJump(len(bpf) - (i + 1)) |
| } |
| delete(ref, name) |
| } |
| |
| // Conditional jumps: jump if condition is true, fall through otherwise. |
| jeq := func(val uint32, target string) { |
| // if A == val { goto target } |
| do(bpfJeq(val, 0, 1)) |
| jump(target) |
| } |
| jne := func(val uint32, target string) { |
| // if A != val { goto target } |
| do(bpfJeq(val, 1, 0)) |
| jump(target) |
| } |
| jset := func(val uint32, target string) { |
| // if A&val != 0 { goto target } |
| do(bpfJset(val, 0, 1)) |
| jump(target) |
| } |
| jnset := func(val uint32, target string) { |
| // if A&val == 0 { goto target } |
| do(bpfJset(val, 1, 0)) |
| jump(target) |
| } |
| |
| do(bpfLoadArch()) |
| do(bpfJeq(auditArch, 1, 0)) |
| do(bpfRet(retKill())) |
| |
| do(bpfLoadNR()) |
| for _, p := range ps { |
| nr, ok := syscallNum[p.name] |
| if !ok { |
| return nil, fmt.Errorf("unknown syscall: %s", p.name) |
| } |
| jne(uint32(nr), "nextcall") |
| |
| for _, and := range p.expr { |
| for _, arg := range and { |
| val := struct{ high, low uint32 }{uint32(arg.val >> 32), uint32(arg.val)} |
| switch arg.oper { |
| case "==": |
| if long { |
| do(bpfLoadArg(arg.idx, 1)) |
| jne(val.high, "nextor") |
| } |
| do(bpfLoadArg(arg.idx, 0)) |
| jne(val.low, "nextor") |
| case "!=": |
| if long { |
| do(bpfLoadArg(arg.idx, 1)) |
| jne(val.high, "nextand") |
| } |
| do(bpfLoadArg(arg.idx, 0)) |
| jeq(val.low, "nextor") |
| case "&": |
| if long { |
| do(bpfLoadArg(arg.idx, 1)) |
| jset(val.high, "nextand") |
| } |
| do(bpfLoadArg(arg.idx, 0)) |
| jnset(val.low, "nextor") |
| default: |
| return nil, fmt.Errorf("unknown operator: %q", arg.oper) |
| } |
| |
| // Comparison was satisfied. Move on to the next comparison in &&. |
| label("nextand") |
| } |
| |
| // All comparisons in && were satisfied. |
| do(p.then) |
| |
| // Some comparison in && was false. Move on to the next expression in ||. |
| label("nextor") |
| } |
| |
| // All expressions in || evaluated to false (or expr was nil). |
| if p.def != nil { |
| do(*p.def) |
| } else { |
| jump("default") |
| } |
| |
| label("nextcall") |
| } |
| |
| label("default") |
| do(def) |
| |
| if len(ref) > 0 { |
| return nil, fmt.Errorf("unresolved labels: %v\n%v", ref, bpf) |
| } |
| return bpf, nil |
| } |
| |
| // Compile reads a Chromium-OS policy file and compiles a |
| // Seccomp-BPF filter program implementing the policies. |
| func Compile(path string) ([]SockFilter, error) { |
| ps, err := parseFile(path) |
| if err != nil { |
| return nil, err |
| } |
| return compile(ps, nbits > 32, bpfRet(retKill())) |
| } |
| |
| // prctl is a wrapper for the 'prctl' system call. |
| // See 'man prctl' for details. |
| func prctl(option uintptr, args ...uintptr) error { |
| if len(args) > 4 { |
| return syscall.E2BIG |
| } |
| var arg [4]uintptr |
| copy(arg[:], args) |
| _, _, e := syscall.Syscall6(C.__NR_prctl, option, arg[0], arg[1], arg[2], arg[3], 0) |
| if e != 0 { |
| return e |
| } |
| return nil |
| } |
| |
| // seccomp is a wrapper for the 'seccomp' system call. |
| // See <linux/seccomp.h> for valid op and flag values. |
| // uargs is typically a pointer to struct sock_fprog. |
| func seccomp(op, flags uintptr, uargs unsafe.Pointer) error { |
| _, _, e := syscall.Syscall(C.__NR_seccomp, op, flags, uintptr(uargs)) |
| if e != 0 { |
| return e |
| } |
| return nil |
| } |
| |
| // CheckSupport checks for the required seccomp support in the kernel. |
| func CheckSupport() error { |
| // This is based on http://outflux.net/teach-seccomp/autodetect.html. |
| if err := prctl(C.PR_GET_SECCOMP); err != nil { |
| return fmt.Errorf("seccomp not available: %v", err) |
| } |
| if err := prctl(C.PR_SET_SECCOMP, C.SECCOMP_MODE_FILTER, 0); err != syscall.EFAULT { |
| return fmt.Errorf("seccomp filter not available: %v", err) |
| } |
| if err := seccomp(C.SECCOMP_SET_MODE_FILTER, 0, nil); err != syscall.EFAULT { |
| return fmt.Errorf("seccomp syscall not available: %v", err) |
| } |
| if err := seccomp(C.SECCOMP_SET_MODE_FILTER, C.SECCOMP_FILTER_FLAG_TSYNC, nil); err != syscall.EFAULT { |
| return fmt.Errorf("seccomp tsync not available: %v", err) |
| } |
| return nil |
| } |
| |
| // Load makes the seccomp system call to install the bpf filter for |
| // all threads (with tsync). prctl(set_no_new_privs, 1) must have |
| // been called (from the same thread) before calling Load for the |
| // first time. |
| // Most users of this library should use Install instead of calling |
| // Load directly. There are a couple of situations where it may be |
| // necessary to use Load instead of Install: |
| // - If a previous call to Install has disabled the 'prctl' system |
| // call, Install cannot be called again. In that case, it is safe |
| // to add additional filters directly with Load. |
| // - If the process is running as a priviledged user, and you want |
| // to load the seccomp filter without setting no_new_privs. |
| func Load(bpf []SockFilter) error { |
| if size, limit := len(bpf), 0xffff; size > limit { |
| return fmt.Errorf("filter program too big: %d bpf instructions (limit = %d)", size, limit) |
| } |
| prog := &SockFprog{ |
| Filter: &bpf[0], |
| Len: uint16(len(bpf)), |
| } |
| return seccomp(C.SECCOMP_SET_MODE_FILTER, C.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(prog)) |
| } |
| |
| // Install makes the necessary system calls to install the Seccomp-BPF |
| // filter for the current process (all threads). Install can be called |
| // multiple times to install additional filters. |
| func Install(bpf []SockFilter) error { |
| // prctl(set_no_new_privs, 1) must be called (from the same thread) |
| // before a seccomp filter can be installed by an unprivileged user: |
| // - http://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt. |
| runtime.LockOSThread() |
| defer runtime.UnlockOSThread() |
| if err := prctl(C.PR_SET_NO_NEW_PRIVS, 1); err != nil { |
| return err |
| } |
| return Load(bpf) |
| } |