Merge pull request #336 from hqhq/hq_parent_cgroup_systemd
systemd: support cgroup parent with specified slice
diff --git a/README.md b/README.md
index c15d97c..9996580 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,8 @@
## State of the project
Currently `runc` is an implementation of the OCI specification. We are currently sprinting
-to have a v1 of the spec out within a quick timeframe of a few weeks, ~July 2015,
-so the `runc` config format will be constantly changing until
-the spec is finalized. However, we encourage you to try out the tool and give feedback.
+to have a v1 of the spec out. So the `runc` config format will be constantly changing until
+the spec is finalized. However, we encourage you to try out the tool and give feedback.
### OCF
diff --git a/libcontainer/console_freebsd.go b/libcontainer/console_freebsd.go
index 4d20b8d..3c89eda 100644
--- a/libcontainer/console_freebsd.go
+++ b/libcontainer/console_freebsd.go
@@ -6,8 +6,8 @@
"errors"
)
-// newConsole returns an initalized console that can be used within a container by copying bytes
+// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
-func newConsole(uid, gid int) (Console, error) {
+func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}
diff --git a/libcontainer/console_linux.go b/libcontainer/console_linux.go
index f345f57..7af771b 100644
--- a/libcontainer/console_linux.go
+++ b/libcontainer/console_linux.go
@@ -10,9 +10,9 @@
"github.com/opencontainers/runc/libcontainer/label"
)
-// newConsole returns an initalized console that can be used within a container by copying bytes
+// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
-func newConsole(uid, gid int) (Console, error) {
+func NewConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
diff --git a/libcontainer/console_windows.go b/libcontainer/console_windows.go
index 80c7463..a68c02f 100644
--- a/libcontainer/console_windows.go
+++ b/libcontainer/console_windows.go
@@ -1,7 +1,7 @@
package libcontainer
-// newConsole returns an initalized console that can be used within a container
-func newConsole(uid, gid int) (Console, error) {
+// NewConsole returns an initalized console that can be used within a container
+func NewConsole(uid, gid int) (Console, error) {
return &windowsConsole{}, nil
}
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 912673a..82476ed 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -3,8 +3,10 @@
package libcontainer
import (
+ "bytes"
"encoding/json"
"fmt"
+ "io"
"io/ioutil"
"os"
"os/exec"
@@ -19,6 +21,7 @@
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
+ "github.com/vishvananda/netlink/nl"
)
const stdioFdCount = 3
@@ -218,7 +221,7 @@
return nil, newSystemError(err)
}
if !doInit {
- return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil
+ return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
}
@@ -273,23 +276,24 @@
}, nil
}
-func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess {
- cmd.Env = append(cmd.Env,
- fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
- "_LIBCONTAINER_INITTYPE=setns",
- )
- if p.consolePath != "" {
- cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath)
+func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
+ cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=setns")
+ // for setns process, we dont have to set cloneflags as the process namespaces
+ // will only be set via setns syscall
+ data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath)
+ if err != nil {
+ return nil, err
}
// TODO: set on container for process management
return &setnsProcess{
- cmd: cmd,
- cgroupPaths: c.cgroupManager.GetPaths(),
- childPipe: childPipe,
- parentPipe: parentPipe,
- config: c.newInitConfig(p),
- process: p,
- }
+ cmd: cmd,
+ cgroupPaths: c.cgroupManager.GetPaths(),
+ childPipe: childPipe,
+ parentPipe: parentPipe,
+ config: c.newInitConfig(p),
+ process: p,
+ bootstrapData: data,
+ }, nil
}
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
@@ -1021,3 +1025,25 @@
}
return state, nil
}
+
+// bootstrapData encodes the necessary data in netlink binary format as a io.Reader.
+// Consumer can write the data to a bootstrap program such as one that uses
+// nsenter package to bootstrap the container's init process correctly, i.e. with
+// correct namespaces, uid/gid mapping etc.
+func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) {
+ // create the netlink message
+ r := nl.NewNetlinkRequest(int(InitMsg), 0)
+ // write pid
+ r.AddData(&Int32msg{
+ Type: PidAttr,
+ Value: uint32(pid),
+ })
+ // write console path
+ if consolePath != "" {
+ r.AddData(&Bytemsg{
+ Type: ConsolePathAttr,
+ Value: []byte(consolePath),
+ })
+ }
+ return bytes.NewReader(r.Serialize()), nil
+}
diff --git a/libcontainer/error.go b/libcontainer/error.go
index 6c26662..aa59d2a 100644
--- a/libcontainer/error.go
+++ b/libcontainer/error.go
@@ -22,6 +22,7 @@
// Common errors
ConfigInvalid
+ ConsoleExists
SystemError
)
@@ -43,6 +44,8 @@
return "Container is not stopped"
case ContainerNotRunning:
return "Container is not running"
+ case ConsoleExists:
+ return "Console exists for process"
default:
return "Unknown error"
}
diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go
new file mode 100644
index 0000000..0e95e3b
--- /dev/null
+++ b/libcontainer/message_linux.go
@@ -0,0 +1,60 @@
+// +build linux
+
+package libcontainer
+
+import (
+ "syscall"
+
+ "github.com/vishvananda/netlink/nl"
+)
+
+// list of known message types we want to send to bootstrap program
+// The number is randomly chosen to not conflict with known netlink types
+const (
+ InitMsg uint16 = 62000
+ PidAttr uint16 = 27281
+ ConsolePathAttr uint16 = 27282
+)
+
+type Int32msg struct {
+ Type uint16
+ Value uint32
+}
+
+// int32msg has the following representation
+// | nlattr len | nlattr type |
+// | uint32 value |
+func (msg *Int32msg) Serialize() []byte {
+ buf := make([]byte, msg.Len())
+ native := nl.NativeEndian()
+ native.PutUint16(buf[0:2], uint16(msg.Len()))
+ native.PutUint16(buf[2:4], msg.Type)
+ native.PutUint32(buf[4:8], msg.Value)
+ return buf
+}
+
+func (msg *Int32msg) Len() int {
+ return syscall.NLA_HDRLEN + 4
+}
+
+// bytemsg has the following representation
+// | nlattr len | nlattr type |
+// | value | pad |
+type Bytemsg struct {
+ Type uint16
+ Value []byte
+}
+
+func (msg *Bytemsg) Serialize() []byte {
+ l := msg.Len()
+ buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
+ native := nl.NativeEndian()
+ native.PutUint16(buf[0:2], uint16(l))
+ native.PutUint16(buf[2:4], msg.Type)
+ copy(buf[4:], msg.Value)
+ return buf
+}
+
+func (msg *Bytemsg) Len() int {
+ return syscall.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
+}
diff --git a/libcontainer/nsenter/nsenter_test.go b/libcontainer/nsenter/nsenter_test.go
index db27b8a..976ae6b 100644
--- a/libcontainer/nsenter/nsenter_test.go
+++ b/libcontainer/nsenter/nsenter_test.go
@@ -1,12 +1,17 @@
package nsenter
import (
+ "bytes"
"encoding/json"
- "fmt"
+ "io"
"os"
"os/exec"
"strings"
+ "syscall"
"testing"
+
+ "github.com/opencontainers/runc/libcontainer"
+ "github.com/vishvananda/netlink/nl"
)
type pid struct {
@@ -15,7 +20,7 @@
func TestNsenterAlivePid(t *testing.T) {
args := []string{"nsenter-exec"}
- r, w, err := os.Pipe()
+ parent, child, err := newPipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
@@ -23,16 +28,22 @@
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
- ExtraFiles: []*os.File{w},
- Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"},
+ ExtraFiles: []*os.File{child},
+ Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
}
if err := cmd.Start(); err != nil {
t.Fatalf("nsenter failed to start %v", err)
}
- w.Close()
-
- decoder := json.NewDecoder(r)
+ r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
+ r.AddData(&libcontainer.Int32msg{
+ Type: libcontainer.PidAttr,
+ Value: uint32(os.Getpid()),
+ })
+ if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
+ t.Fatal(err)
+ }
+ decoder := json.NewDecoder(parent)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
@@ -51,34 +62,67 @@
func TestNsenterInvalidPid(t *testing.T) {
args := []string{"nsenter-exec"}
-
- cmd := &exec.Cmd{
- Path: os.Args[0],
- Args: args,
- Env: []string{"_LIBCONTAINER_INITPID=-1"},
+ parent, child, err := newPipe()
+ if err != nil {
+ t.Fatalf("failed to create pipe %v", err)
}
- err := cmd.Run()
- if err == nil {
+ cmd := &exec.Cmd{
+ Path: os.Args[0],
+ Args: args,
+ ExtraFiles: []*os.File{child},
+ Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
+ }
+
+ if err := cmd.Start(); err != nil {
+ t.Fatal("nsenter exits with a zero exit status")
+ }
+ r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
+ r.AddData(&libcontainer.Int32msg{
+ Type: libcontainer.PidAttr,
+ Value: 0,
+ })
+ if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
+ t.Fatal(err)
+ }
+
+ if err := cmd.Wait(); err == nil {
t.Fatal("nsenter exits with a zero exit status")
}
}
func TestNsenterDeadPid(t *testing.T) {
- dead_cmd := exec.Command("true")
- if err := dead_cmd.Run(); err != nil {
+ deadCmd := exec.Command("true")
+ if err := deadCmd.Run(); err != nil {
t.Fatal(err)
}
args := []string{"nsenter-exec"}
-
- cmd := &exec.Cmd{
- Path: os.Args[0],
- Args: args,
- Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)},
+ parent, child, err := newPipe()
+ if err != nil {
+ t.Fatalf("failed to create pipe %v", err)
}
- err := cmd.Run()
- if err == nil {
+ cmd := &exec.Cmd{
+ Path: os.Args[0],
+ Args: args,
+ ExtraFiles: []*os.File{child},
+ Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
+ }
+
+ if err := cmd.Start(); err != nil {
+ t.Fatal("nsenter exits with a zero exit status")
+ }
+
+ r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
+ r.AddData(&libcontainer.Int32msg{
+ Type: libcontainer.PidAttr,
+ Value: uint32(deadCmd.Process.Pid),
+ })
+ if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
+ t.Fatal(err)
+ }
+
+ if err := cmd.Wait(); err == nil {
t.Fatal("nsenter exits with a zero exit status")
}
}
@@ -89,3 +133,11 @@
}
return
}
+
+func newPipe() (parent *os.File, child *os.File, err error) {
+ fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
index 01450a9..27e6e53 100644
--- a/libcontainer/nsenter/nsexec.c
+++ b/libcontainer/nsenter/nsexec.c
@@ -17,6 +17,11 @@
#include <sched.h>
#include <signal.h>
+#include <linux/netlink.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <sys/socket.h>
+
/* All arguments should be above stack, because it grows down */
struct clone_arg {
/*
@@ -63,24 +68,33 @@
return child;
}
+static uint32_t readint32(char *buf)
+{
+ return *(uint32_t *) buf;
+}
+
+// list of known message types we want to send to bootstrap program
+// These are defined in libcontainer/message_linux.go
+#define INIT_MSG 62000
+#define PID_ATTR 27281
+#define CONSOLE_PATH_ATTR 27282
+
void nsexec()
{
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
const int num = sizeof(namespaces) / sizeof(char *);
jmp_buf env;
char buf[PATH_MAX], *val;
- int i, tfd, self_tfd, child, len, pipenum, consolefd = -1;
- pid_t pid;
- char *console;
+ int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1;
+ pid_t pid = 0;
- val = getenv("_LIBCONTAINER_INITPID");
- if (val == NULL)
+ // if we dont have INITTYPE or this is the init process, skip the bootstrap process
+ val = getenv("_LIBCONTAINER_INITTYPE");
+ if (val == NULL || strcmp(val, "standard") == 0) {
return;
-
- pid = atoi(val);
- snprintf(buf, sizeof(buf), "%d", pid);
- if (strcmp(val, buf)) {
- pr_perror("Unable to parse _LIBCONTAINER_INITPID");
+ }
+ if (strcmp(val, "setns") != 0) {
+ pr_perror("Invalid inittype %s", val);
exit(1);
}
@@ -89,7 +103,6 @@
pr_perror("Child pipe not found");
exit(1);
}
-
pipenum = atoi(val);
snprintf(buf, sizeof(buf), "%d", pipenum);
if (strcmp(val, buf)) {
@@ -97,13 +110,56 @@
exit(1);
}
- console = getenv("_LIBCONTAINER_CONSOLE_PATH");
- if (console != NULL) {
- consolefd = open(console, O_RDWR);
- if (consolefd < 0) {
- pr_perror("Failed to open console %s", console);
- exit(1);
+ char nlbuf[NLMSG_HDRLEN];
+ struct nlmsghdr *nh;
+ if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
+ pr_perror("Failed to read netlink header, got %d", n);
+ exit(1);
+ }
+
+ nh = (struct nlmsghdr *)nlbuf;
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ pr_perror("Invalid netlink header message");
+ exit(1);
+ }
+ if (nh->nlmsg_type != INIT_MSG) {
+ pr_perror("Unexpected netlink message type %d", nh->nlmsg_type);
+ exit(1);
+ }
+ // read the netlink payload
+ len = NLMSG_PAYLOAD(nh, 0);
+ char data[len];
+ if ((n = read(pipenum, data, len)) != len) {
+ pr_perror("Failed to read netlink payload, got %d", n);
+ exit(1);
+ }
+
+ int start = 0;
+ struct nlattr *attr;
+ while (start < len) {
+ int payload_len;
+ attr = (struct nlattr *)((void *)data + start);
+ start += NLA_HDRLEN;
+ payload_len = attr->nla_len - NLA_HDRLEN;
+ switch (attr->nla_type) {
+ case PID_ATTR:
+ pid = (pid_t) readint32(data + start);
+ break;
+ case CONSOLE_PATH_ATTR:
+ consolefd = open((char *)data + start, O_RDWR);
+ if (consolefd < 0) {
+ pr_perror("Failed to open console %s", (char *)data + start);
+ exit(1);
+ }
+ break;
}
+ start += NLA_ALIGN(payload_len);
+ }
+
+ // required pid to be passed
+ if (pid == 0) {
+ pr_perror("missing pid");
+ exit(1);
}
/* Check that the specified process exists */
@@ -133,15 +189,13 @@
}
/* Skip namespaces we're already part of */
- if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 &&
- st.st_ino == self_st.st_ino) {
+ if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) {
continue;
}
fd = openat(tfd, namespaces[i], O_RDONLY);
if (fd == -1) {
- pr_perror("Failed to open ns file %s for ns %s", buf,
- namespaces[i]);
+ pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
exit(1);
}
// Set the namespace.
diff --git a/libcontainer/process.go b/libcontainer/process.go
index 7902d08..e96dc0d 100644
--- a/libcontainer/process.go
+++ b/libcontainer/process.go
@@ -80,10 +80,19 @@
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid int) (Console, error) {
- console, err := newConsole(rootuid, rootuid)
+ console, err := NewConsole(rootuid, rootuid)
if err != nil {
return nil, err
}
p.consolePath = console.Path()
return console, nil
}
+
+// ConsoleFromPath sets the process's console with the path provided
+func (p *Process) ConsoleFromPath(path string) error {
+ if p.consolePath != "" {
+ return newGenericError(fmt.Errorf("console path already exists for process"), ConsoleExists)
+ }
+ p.consolePath = path
+ return nil
+}
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
index 4d17cbc..f27b6cf 100644
--- a/libcontainer/process_linux.go
+++ b/libcontainer/process_linux.go
@@ -41,13 +41,14 @@
}
type setnsProcess struct {
- cmd *exec.Cmd
- parentPipe *os.File
- childPipe *os.File
- cgroupPaths map[string]string
- config *initConfig
- fds []string
- process *Process
+ cmd *exec.Cmd
+ parentPipe *os.File
+ childPipe *os.File
+ cgroupPaths map[string]string
+ config *initConfig
+ fds []string
+ process *Process
+ bootstrapData io.Reader
}
func (p *setnsProcess) startTime() (string, error) {
@@ -64,6 +65,16 @@
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
+ err = p.cmd.Start()
+ p.childPipe.Close()
+ if err != nil {
+ return newSystemError(err)
+ }
+ if p.bootstrapData != nil {
+ if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
+ return newSystemError(err)
+ }
+ }
if err = p.execSetns(); err != nil {
return newSystemError(err)
}
@@ -96,11 +107,6 @@
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
func (p *setnsProcess) execSetns() error {
- err := p.cmd.Start()
- p.childPipe.Close()
- if err != nil {
- return newSystemError(err)
- }
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()