Implement virtio-vsock
Implement virtio-vsock using the vhost infrastructure provided by the kernel.
BUG=chromium:708267
TEST=Run nc-vsock inside and outside the VM and verify that data is transferred
Change-Id: Ifc2e4116e440ecfe2ea39718ead606a73dc3357f
Signed-off-by: Chirantan Ekbote <chirantan@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/495829
Reviewed-by: Dylan Reid <dgreid@chromium.org>
diff --git a/Makefile b/Makefile
index d07f2bb..bbf5d2d 100644
--- a/Makefile
+++ b/Makefile
@@ -67,6 +67,7 @@
OBJS += virtio/rng.o
OBJS += virtio/balloon.o
OBJS += virtio/pci.o
+OBJS += virtio/vsock.o
OBJS += disk/blk.o
OBJS += disk/qcow.o
OBJS += disk/raw.o
diff --git a/builtin-run.c b/builtin-run.c
index 4eff4ac..71e3f2e 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -12,6 +12,7 @@
#include "kvm/virtio-blk.h"
#include "kvm/virtio-net.h"
#include "kvm/virtio-rng.h"
+#include "kvm/virtio-vsock.h"
#include "kvm/ioeventfd.h"
#include "kvm/virtio-9p.h"
#include "kvm/barrier.h"
@@ -119,6 +120,8 @@
" guest", virtio_9p_rootdir_parser, kvm), \
OPT_STRING('\0', "console", &(cfg)->console, "serial, virtio or"\
" hv", "Console to use"), \
+ OPT_U64('\0', "vsock", &(cfg)->guest_cid, "Use vsockets" \
+ " <cid for quest>"), \
OPT_STRING('\0', "dev", &(cfg)->dev, "device_file", \
"KVM device file"), \
OPT_CALLBACK('\0', "tty", NULL, "tty id", \
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index 386fa8c..ac2d233 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -27,6 +27,7 @@
int active_console;
int debug_iodelay;
int nrcpus;
+ u64 guest_cid;
const char *kernel_cmdline;
const char *kernel_filename;
const char *vmlinux_filename;
diff --git a/include/kvm/virtio-pci-dev.h b/include/kvm/virtio-pci-dev.h
index 48ae018..fae8644 100644
--- a/include/kvm/virtio-pci-dev.h
+++ b/include/kvm/virtio-pci-dev.h
@@ -15,6 +15,7 @@
#define PCI_DEVICE_ID_VIRTIO_BLN 0x1005
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1008
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
+#define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1013
#define PCI_DEVICE_ID_VESA 0x2000
#define PCI_DEVICE_ID_PCI_SHMEM 0x0001
@@ -34,5 +35,6 @@
#define PCI_CLASS_RNG 0xff0000
#define PCI_CLASS_BLN 0xff0000
#define PCI_CLASS_9P 0xff0000
+#define PCI_CLASS_VSOCK 0xff0000
#endif /* VIRTIO_PCI_DEV_H_ */
diff --git a/include/kvm/virtio-vsock.h b/include/kvm/virtio-vsock.h
new file mode 100644
index 0000000..c448e67
--- /dev/null
+++ b/include/kvm/virtio-vsock.h
@@ -0,0 +1,9 @@
+#ifndef KVM__VIRTIO_VSOCK_H
+#define KVM__VIRTIO_VSOCK_H
+
+struct kvm;
+
+int virtio_vsock_init(struct kvm *kvm);
+int virtio_vsock_exit(struct kvm *kvm);
+
+#endif // KVM__VIRTIO_VSOCK_H
diff --git a/include/linux/vhost.h b/include/linux/vhost.h
index bb6a5b4..9e160c3 100644
--- a/include/linux/vhost.h
+++ b/include/linux/vhost.h
@@ -155,4 +155,9 @@
#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
+/* VHOST_VSOCK specific defines */
+
+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
+#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
+
#endif
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
index 5f60aa4..1570134 100644
--- a/include/linux/virtio_ids.h
+++ b/include/linux/virtio_ids.h
@@ -40,5 +40,6 @@
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_VSOCK 19 /* virtual sockets */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/virtio/vsock.c b/virtio/vsock.c
new file mode 100644
index 0000000..5fe1b71
--- /dev/null
+++ b/virtio/vsock.c
@@ -0,0 +1,317 @@
+#include "kvm/virtio-vsock.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kvm.h>
+#include <linux/vhost.h>
+#include <linux/virtio_config.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include "kvm/guest_compat.h"
+#include "kvm/kvm.h"
+#include "kvm/virtio.h"
+#include "kvm/virtio-pci-dev.h"
+#include "kvm/util.h"
+
+#define VIRTIO_VSOCK_QUEUE_SIZE 256
+#define VIRTIO_VSOCK_NUM_QUEUES 3
+
+static int compat_id = -1;
+static struct vsock_dev *g_vdev = NULL;
+
+struct virtio_vsock_config {
+ u64 guest_cid;
+};
+
+struct vsock_dev {
+ struct virt_queue vqs[VIRTIO_VSOCK_NUM_QUEUES];
+ struct virtio_vsock_config config;
+
+ u64 features;
+ int vhost_fd;
+ u8 status;
+
+ struct virtio_device dev;
+ struct kvm *kvm;
+};
+
+static u8 *get_config(struct kvm *kvm, void *dev) {
+ struct vsock_dev *vdev = dev;
+
+ return ((u8*)(&vdev->config));
+}
+
+static u32 get_host_features(struct kvm *kvm, void *dev) {
+ struct vsock_dev *vdev = dev;
+ return vdev->features;
+}
+
+static void set_guest_features(struct kvm *kvm, void *dev, u32 features) {
+ struct vsock_dev *vdev = dev;
+
+ u64 nfeatures = features;
+ if (ioctl(vdev->vhost_fd, VHOST_SET_FEATURES, &nfeatures) != 0) {
+ pr_err("Unable to set vhost features for virtio-vsock");
+ return;
+ }
+
+ vdev->features = features;
+}
+
+static void notify_status(struct kvm *kvm, void *dev, u8 status) {
+ struct vsock_dev *vdev = dev;
+ if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) == 0 &&
+ (status & VIRTIO_CONFIG_S_DRIVER_OK) != 0) {
+ // The driver was just enabled.
+ int on = 1;
+ if (ioctl(vdev->vhost_fd, VHOST_VSOCK_SET_RUNNING, &on) != 0)
+ die_perror("VHOST_VSOCK_SET_RUNNING failed");
+ }
+
+ if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != 0 &&
+ (status & VIRTIO_CONFIG_S_DRIVER_OK) == 0) {
+ // The driver was just disabled.
+ int off = 0;
+ if (ioctl(vdev->vhost_fd, VHOST_VSOCK_SET_RUNNING, &off) != 0)
+ die_perror("VHOST_VSOCK_SET_RUNNING failed");
+ }
+
+ vdev->status = status;
+}
+
+static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
+ u32 pfn) {
+ compat__remove_message(compat_id);
+
+ int ret = 0;
+ void *p = NULL;
+ struct vsock_dev *vdev = dev;
+ struct virt_queue *queue = &vdev->vqs[vq];
+
+ queue->pfn = pfn;
+ p = virtio_get_vq(kvm, queue->pfn, page_size);
+
+ vring_init(&queue->vring, VIRTIO_VSOCK_QUEUE_SIZE, p, align);
+
+ if (vq > 1) {
+ // TODO(chirantan): Implement the event virtqueue
+ return 0;
+ }
+
+ struct vhost_vring_state state = {
+ .index = vq,
+ .num = queue->vring.num,
+ };
+ if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_NUM, &state) != 0) {
+ ret = -errno;
+ pr_err("VHOST_SET_VRING_NUM failed for vsock device: %d", ret);
+ return ret;
+ }
+
+ state.num = 0;
+ if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_BASE, &state) != 0) {
+ ret = -errno;
+ pr_err("VHOST_SET_VRING_BASE failed for vsock device: %d", ret);
+ return ret;
+ }
+
+ struct vhost_vring_addr addr = {
+ .index = vq,
+ .desc_user_addr = (u64)queue->vring.desc,
+ .avail_user_addr = (u64)queue->vring.avail,
+ .used_user_addr = (u64)queue->vring.used,
+ };
+ if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_ADDR, &addr) != 0) {
+ ret = -errno;
+ pr_err("VHOST_SET_VRING_ADDR failed for vsock device: %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) {
+ if (vq > 1) {
+ // TODO(chirantan): Implement the event virtqueue
+ return;
+ }
+ int fd = eventfd(0, 0);
+ if (fd < 0) {
+ // No graceful way to exit here.
+ die_perror("Unable to create eventfd");
+ }
+
+ struct kvm_irqfd irq = {
+ .gsi = gsi,
+ .fd = fd,
+ };
+ if (ioctl(kvm->vm_fd, KVM_IRQFD, &irq) != 0)
+ die_perror("KVM_IRQFD failed for vsock device");
+
+ struct vhost_vring_file file = {
+ .index = vq,
+ .fd = irq.fd,
+ };
+ struct vsock_dev *vdev = dev;
+ if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_CALL, &file) != 0)
+ die_perror("VHOST_SET_VRING_CALL failed for vsock device");
+}
+
+static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) {
+ if (vq > 1) {
+ // TODO(chirantan): Implement the event virtqueue
+ return;
+ }
+ struct vsock_dev *vdev = dev;
+ struct vhost_vring_file file = {
+ .index = vq,
+ .fd = efd,
+ };
+
+ if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_KICK, &file) != 0)
+ die_perror("VHOST_VRING_SET_KICK failed for vsock device");
+}
+
+static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+ return 0;
+}
+
+static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+ struct vsock_dev *vdev = dev;
+
+ return vdev->vqs[vq].pfn;
+}
+
+static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) {
+ return VIRTIO_VSOCK_QUEUE_SIZE;
+}
+
+static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
+{
+ // Unsupported?
+ return size;
+}
+
+static struct virtio_ops vsock_dev_virtio_ops = {
+ .get_config = get_config,
+ .get_host_features = get_host_features,
+ .set_guest_features = set_guest_features,
+ .init_vq = init_vq,
+ .get_pfn_vq = get_pfn_vq,
+ .get_size_vq = get_size_vq,
+ .set_size_vq = set_size_vq,
+ .notify_vq = notify_vq,
+ .notify_vq_gsi = notify_vq_gsi,
+ .notify_vq_eventfd = notify_vq_eventfd,
+ .notify_status = notify_status,
+};
+
+int virtio_vsock_init(struct kvm *kvm) {
+ int ret = 0;
+
+ if (kvm->cfg.guest_cid == 0)
+ return 0;
+
+ if (g_vdev != NULL) {
+ pr_err("Already initialized virtio vsock once");
+ return -EINVAL;
+ }
+
+ struct vsock_dev *vdev = malloc(sizeof(struct vsock_dev));
+ if (vdev == NULL)
+ return -ENOMEM;
+
+ vdev->config = (struct virtio_vsock_config) {
+ .guest_cid = kvm->cfg.guest_cid,
+ };
+ vdev->kvm = kvm;
+
+ ret = virtio_init(kvm, vdev, &vdev->dev, &vsock_dev_virtio_ops,
+ VIRTIO_DEFAULT_TRANS(kvm), PCI_DEVICE_ID_VIRTIO_VSOCK,
+ VIRTIO_ID_VSOCK, PCI_CLASS_VSOCK);
+ if (ret < 0)
+ goto cleanup;
+
+ vdev->vhost_fd = open("/dev/vhost-vsock", O_RDWR);
+ if (vdev->vhost_fd < 0) {
+ ret = -errno;
+ pr_err("Unable to open vhost-vsock device: %d", ret);
+ goto cleanup;
+ }
+
+ if (ioctl(vdev->vhost_fd, VHOST_SET_OWNER) != 0) {
+ ret = -errno;
+ pr_err("VHOST_SET_OWNER failed on vhost-vsock device: %d", ret);
+ goto vhost_cleanup;
+ }
+
+ if (ioctl(vdev->vhost_fd, VHOST_GET_FEATURES, &vdev->features) != 0) {
+ ret = -errno;
+ pr_err("VHOST_GET_FEATURES failed on vhost-vsock device: %d", ret);
+ goto vhost_cleanup;
+ }
+
+ struct vhost_memory *mem = malloc(sizeof(struct vhost_memory) +
+ sizeof(struct vhost_memory_region));
+ if (mem == NULL) {
+ ret = -ENOMEM;
+ goto vhost_cleanup;
+ }
+
+ mem->nregions = 1;
+ mem->regions[0] = (struct vhost_memory_region) {
+ .guest_phys_addr = 0,
+ .memory_size = kvm->ram_size,
+ .userspace_addr = (unsigned long) kvm->ram_start,
+ };
+ if (ioctl(vdev->vhost_fd, VHOST_SET_MEM_TABLE, mem) != 0) {
+ ret = -errno;
+ pr_err("VHOST_SET_MEM_TABLE on vhost-vsock device failed: %d", ret);
+ goto vhost_mem_cleanup;
+ }
+ free(mem); // sigh... manual memory management
+
+ if (ioctl(vdev->vhost_fd, VHOST_VSOCK_SET_GUEST_CID,
+ &vdev->config.guest_cid) != 0) {
+ ret = -errno;
+ pr_err("VHOST_VSOCK_SET_GUEST_CID failed: %d", ret);
+ goto vhost_cleanup;
+ }
+
+ vdev->dev.use_vhost = true;
+
+ if (compat_id == -1) {
+ compat_id = virtio_compat_add_message("virtio-vsock",
+ "CONFIG_VIRTIO_VSOCKETS");
+ }
+
+ g_vdev = vdev;
+ return 0;
+
+vhost_mem_cleanup:
+ free(mem);
+vhost_cleanup:
+ close(vdev->vhost_fd);
+cleanup:
+ free(vdev);
+
+ return ret;
+}
+virtio_dev_init(virtio_vsock_init);
+
+int virtio_vsock_exit(struct kvm *kvm) {
+ if (g_vdev == NULL)
+ return 0;
+
+ struct vsock_dev *vdev = g_vdev;
+ g_vdev = NULL;
+
+ close(vdev->vhost_fd);
+ free(vdev);
+
+ return 0;
+}
+virtio_dev_exit(virtio_vsock_exit);