Implement virtio-vsock

Implement virtio-vsock using the vhost infrastructure provided by the kernel.

BUG=chromium:708267
TEST=Run nc-vsock inside and outside the VM and verify that data is transferred

Change-Id: Ifc2e4116e440ecfe2ea39718ead606a73dc3357f
Signed-off-by: Chirantan Ekbote <chirantan@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/495829
Reviewed-by: Dylan Reid <dgreid@chromium.org>
diff --git a/Makefile b/Makefile
index d07f2bb..bbf5d2d 100644
--- a/Makefile
+++ b/Makefile
@@ -67,6 +67,7 @@
 OBJS	+= virtio/rng.o
 OBJS    += virtio/balloon.o
 OBJS	+= virtio/pci.o
+OBJS	+= virtio/vsock.o
 OBJS	+= disk/blk.o
 OBJS	+= disk/qcow.o
 OBJS	+= disk/raw.o
diff --git a/builtin-run.c b/builtin-run.c
index 4eff4ac..71e3f2e 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -12,6 +12,7 @@
 #include "kvm/virtio-blk.h"
 #include "kvm/virtio-net.h"
 #include "kvm/virtio-rng.h"
+#include "kvm/virtio-vsock.h"
 #include "kvm/ioeventfd.h"
 #include "kvm/virtio-9p.h"
 #include "kvm/barrier.h"
@@ -119,6 +120,8 @@
 		     " guest", virtio_9p_rootdir_parser, kvm),		\
 	OPT_STRING('\0', "console", &(cfg)->console, "serial, virtio or"\
 			" hv", "Console to use"),			\
+	OPT_U64('\0', "vsock", &(cfg)->guest_cid, "Use vsockets"	\
+		" <cid for quest>"),				\
 	OPT_STRING('\0', "dev", &(cfg)->dev, "device_file",		\
 			"KVM device file"),				\
 	OPT_CALLBACK('\0', "tty", NULL, "tty id",			\
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index 386fa8c..ac2d233 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -27,6 +27,7 @@
 	int active_console;
 	int debug_iodelay;
 	int nrcpus;
+	u64 guest_cid;
 	const char *kernel_cmdline;
 	const char *kernel_filename;
 	const char *vmlinux_filename;
diff --git a/include/kvm/virtio-pci-dev.h b/include/kvm/virtio-pci-dev.h
index 48ae018..fae8644 100644
--- a/include/kvm/virtio-pci-dev.h
+++ b/include/kvm/virtio-pci-dev.h
@@ -15,6 +15,7 @@
 #define PCI_DEVICE_ID_VIRTIO_BLN		0x1005
 #define PCI_DEVICE_ID_VIRTIO_SCSI		0x1008
 #define PCI_DEVICE_ID_VIRTIO_9P			0x1009
+#define PCI_DEVICE_ID_VIRTIO_VSOCK		0x1013
 #define PCI_DEVICE_ID_VESA			0x2000
 #define PCI_DEVICE_ID_PCI_SHMEM			0x0001
 
@@ -34,5 +35,6 @@
 #define PCI_CLASS_RNG				0xff0000
 #define PCI_CLASS_BLN				0xff0000
 #define PCI_CLASS_9P				0xff0000
+#define PCI_CLASS_VSOCK				0xff0000
 
 #endif /* VIRTIO_PCI_DEV_H_ */
diff --git a/include/kvm/virtio-vsock.h b/include/kvm/virtio-vsock.h
new file mode 100644
index 0000000..c448e67
--- /dev/null
+++ b/include/kvm/virtio-vsock.h
@@ -0,0 +1,9 @@
+#ifndef KVM__VIRTIO_VSOCK_H
+#define KVM__VIRTIO_VSOCK_H
+
+struct kvm;
+
+int virtio_vsock_init(struct kvm *kvm);
+int virtio_vsock_exit(struct kvm *kvm);
+
+#endif  // KVM__VIRTIO_VSOCK_H
diff --git a/include/linux/vhost.h b/include/linux/vhost.h
index bb6a5b4..9e160c3 100644
--- a/include/linux/vhost.h
+++ b/include/linux/vhost.h
@@ -155,4 +155,9 @@
 #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
 #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
 
+/* VHOST_VSOCK specific defines */
+
+#define VHOST_VSOCK_SET_GUEST_CID	_IOW(VHOST_VIRTIO, 0x60, __u64)
+#define VHOST_VSOCK_SET_RUNNING		_IOW(VHOST_VIRTIO, 0x61, int)
+
 #endif
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
index 5f60aa4..1570134 100644
--- a/include/linux/virtio_ids.h
+++ b/include/linux/virtio_ids.h
@@ -40,5 +40,6 @@
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
 #define VIRTIO_ID_INPUT        18 /* virtio input */
+#define VIRTIO_ID_VSOCK        19 /* virtual sockets */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/virtio/vsock.c b/virtio/vsock.c
new file mode 100644
index 0000000..5fe1b71
--- /dev/null
+++ b/virtio/vsock.c
@@ -0,0 +1,317 @@
+#include "kvm/virtio-vsock.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kvm.h>
+#include <linux/vhost.h>
+#include <linux/virtio_config.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include "kvm/guest_compat.h"
+#include "kvm/kvm.h"
+#include "kvm/virtio.h"
+#include "kvm/virtio-pci-dev.h"
+#include "kvm/util.h"
+
+#define VIRTIO_VSOCK_QUEUE_SIZE 256
+#define VIRTIO_VSOCK_NUM_QUEUES 3
+
+static int compat_id = -1;
+static struct vsock_dev *g_vdev = NULL;
+
+struct virtio_vsock_config {
+	u64 guest_cid;
+};
+
+struct vsock_dev {
+	struct virt_queue vqs[VIRTIO_VSOCK_NUM_QUEUES];
+	struct virtio_vsock_config config;
+
+	u64 features;
+	int vhost_fd;
+	u8 status;
+
+	struct virtio_device dev;
+	struct kvm *kvm;
+};
+
+static u8 *get_config(struct kvm *kvm, void *dev) {
+	struct vsock_dev *vdev = dev;
+
+	return ((u8*)(&vdev->config));
+}
+
+static u32 get_host_features(struct kvm *kvm, void *dev) {
+	struct vsock_dev *vdev = dev;
+	return vdev->features;
+}
+
+static void set_guest_features(struct kvm *kvm, void *dev, u32 features) {
+	struct vsock_dev *vdev = dev;
+
+	u64 nfeatures = features;
+	if (ioctl(vdev->vhost_fd, VHOST_SET_FEATURES, &nfeatures) != 0) {
+		pr_err("Unable to set vhost features for virtio-vsock");
+		return;
+	}
+
+	vdev->features = features;
+}
+
+static void notify_status(struct kvm *kvm, void *dev, u8 status) {
+	struct vsock_dev *vdev = dev;
+	if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) == 0 &&
+	    (status & VIRTIO_CONFIG_S_DRIVER_OK) != 0) {
+		// The driver was just enabled.
+		int on = 1;
+		if (ioctl(vdev->vhost_fd, VHOST_VSOCK_SET_RUNNING, &on) != 0)
+			die_perror("VHOST_VSOCK_SET_RUNNING failed");
+	}
+
+	if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != 0 &&
+	    (status & VIRTIO_CONFIG_S_DRIVER_OK) == 0) {
+		// The driver was just disabled.
+		int off = 0;
+		if (ioctl(vdev->vhost_fd, VHOST_VSOCK_SET_RUNNING, &off) != 0)
+			die_perror("VHOST_VSOCK_SET_RUNNING failed");
+	}
+
+	vdev->status = status;
+}
+
+static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
+		u32 pfn) {
+	compat__remove_message(compat_id);
+
+	int ret = 0;
+	void *p = NULL;
+	struct vsock_dev *vdev = dev;
+	struct virt_queue *queue = &vdev->vqs[vq];
+
+	queue->pfn = pfn;
+	p = virtio_get_vq(kvm, queue->pfn, page_size);
+
+	vring_init(&queue->vring, VIRTIO_VSOCK_QUEUE_SIZE, p, align);
+
+	if (vq > 1) {
+		// TODO(chirantan): Implement the event virtqueue
+		return 0;
+	}
+
+	struct vhost_vring_state state = {
+		.index = vq,
+		.num = queue->vring.num,
+	};
+	if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_NUM, &state) != 0) {
+		ret = -errno;
+		pr_err("VHOST_SET_VRING_NUM failed for vsock device: %d", ret);
+		return ret;
+	}
+
+	state.num = 0;
+	if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_BASE, &state) != 0) {
+		ret = -errno;
+		pr_err("VHOST_SET_VRING_BASE failed for vsock device: %d", ret);
+		return ret;
+	}
+
+	struct vhost_vring_addr addr = {
+		.index = vq,
+		.desc_user_addr = (u64)queue->vring.desc,
+		.avail_user_addr = (u64)queue->vring.avail,
+		.used_user_addr = (u64)queue->vring.used,
+	};
+	if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_ADDR, &addr) != 0) {
+		ret = -errno;
+		pr_err("VHOST_SET_VRING_ADDR failed for vsock device: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) {
+	if (vq > 1) {
+		// TODO(chirantan): Implement the event virtqueue
+		return;
+	}
+	int fd = eventfd(0, 0);
+	if (fd < 0) {
+		// No graceful way to exit here.
+		die_perror("Unable to create eventfd");
+	}
+
+	struct kvm_irqfd irq = {
+		.gsi = gsi,
+		.fd = fd,
+	};
+	if (ioctl(kvm->vm_fd, KVM_IRQFD, &irq) != 0)
+		die_perror("KVM_IRQFD failed for vsock device");
+
+	struct vhost_vring_file file = {
+		.index = vq,
+		.fd = irq.fd,
+	};
+	struct vsock_dev *vdev = dev;
+	if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_CALL, &file) != 0)
+		die_perror("VHOST_SET_VRING_CALL failed for vsock device");
+}
+
+static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) {
+	if (vq > 1) {
+		// TODO(chirantan): Implement the event virtqueue
+		return;
+	}
+	struct vsock_dev *vdev = dev;
+	struct vhost_vring_file file = {
+		.index = vq,
+		.fd = efd,
+	};
+
+	if (ioctl(vdev->vhost_fd, VHOST_SET_VRING_KICK, &file) != 0)
+		die_perror("VHOST_VRING_SET_KICK failed for vsock device");
+}
+
+static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+	return 0;
+}
+
+static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+	struct vsock_dev *vdev = dev;
+
+	return vdev->vqs[vq].pfn;
+}
+
+static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) {
+	return VIRTIO_VSOCK_QUEUE_SIZE;
+}
+
+static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
+{
+	// Unsupported?
+	return size;
+}
+
+static struct virtio_ops vsock_dev_virtio_ops = {
+	.get_config = get_config,
+	.get_host_features = get_host_features,
+	.set_guest_features = set_guest_features,
+	.init_vq = init_vq,
+	.get_pfn_vq = get_pfn_vq,
+	.get_size_vq = get_size_vq,
+	.set_size_vq = set_size_vq,
+	.notify_vq = notify_vq,
+	.notify_vq_gsi = notify_vq_gsi,
+	.notify_vq_eventfd = notify_vq_eventfd,
+	.notify_status = notify_status,
+};
+
+int virtio_vsock_init(struct kvm *kvm) {
+	int ret = 0;
+
+	if (kvm->cfg.guest_cid == 0)
+		return 0;
+
+	if (g_vdev != NULL) {
+		pr_err("Already initialized virtio vsock once");
+		return -EINVAL;
+	}
+
+	struct vsock_dev *vdev = malloc(sizeof(struct vsock_dev));
+	if (vdev == NULL)
+		return -ENOMEM;
+
+	vdev->config = (struct virtio_vsock_config) {
+		.guest_cid = kvm->cfg.guest_cid,
+	};
+	vdev->kvm = kvm;
+
+	ret = virtio_init(kvm, vdev, &vdev->dev, &vsock_dev_virtio_ops,
+			  VIRTIO_DEFAULT_TRANS(kvm), PCI_DEVICE_ID_VIRTIO_VSOCK,
+			  VIRTIO_ID_VSOCK, PCI_CLASS_VSOCK);
+	if (ret < 0)
+		goto cleanup;
+
+	vdev->vhost_fd = open("/dev/vhost-vsock", O_RDWR);
+	if (vdev->vhost_fd < 0) {
+		ret = -errno;
+		pr_err("Unable to open vhost-vsock device: %d", ret);
+		goto cleanup;
+	}
+
+	if (ioctl(vdev->vhost_fd, VHOST_SET_OWNER) != 0) {
+		ret = -errno;
+		pr_err("VHOST_SET_OWNER failed on vhost-vsock device: %d", ret);
+		goto vhost_cleanup;
+	}
+
+	if (ioctl(vdev->vhost_fd, VHOST_GET_FEATURES, &vdev->features) != 0) {
+		ret = -errno;
+		pr_err("VHOST_GET_FEATURES failed on vhost-vsock device: %d", ret);
+		goto vhost_cleanup;
+	}
+
+	struct vhost_memory *mem = malloc(sizeof(struct vhost_memory) +
+					  sizeof(struct vhost_memory_region));
+	if (mem == NULL) {
+		ret = -ENOMEM;
+		goto vhost_cleanup;
+	}
+
+	mem->nregions = 1;
+	mem->regions[0] = (struct vhost_memory_region) {
+		.guest_phys_addr = 0,
+		.memory_size = kvm->ram_size,
+		.userspace_addr = (unsigned long) kvm->ram_start,
+	};
+	if (ioctl(vdev->vhost_fd, VHOST_SET_MEM_TABLE, mem) != 0) {
+		ret = -errno;
+		pr_err("VHOST_SET_MEM_TABLE on vhost-vsock device failed: %d", ret);
+		goto vhost_mem_cleanup;
+	}
+	free(mem);  // sigh... manual memory management
+
+	if (ioctl(vdev->vhost_fd, VHOST_VSOCK_SET_GUEST_CID,
+		  &vdev->config.guest_cid) != 0) {
+		ret = -errno;
+		pr_err("VHOST_VSOCK_SET_GUEST_CID failed: %d", ret);
+		goto vhost_cleanup;
+	}
+
+	vdev->dev.use_vhost = true;
+
+	if (compat_id == -1) {
+		compat_id = virtio_compat_add_message("virtio-vsock",
+						      "CONFIG_VIRTIO_VSOCKETS");
+	}
+
+	g_vdev = vdev;
+	return 0;
+
+vhost_mem_cleanup:
+	free(mem);
+vhost_cleanup:
+	close(vdev->vhost_fd);
+cleanup:
+	free(vdev);
+
+	return ret;
+}
+virtio_dev_init(virtio_vsock_init);
+
+int virtio_vsock_exit(struct kvm *kvm) {
+	if (g_vdev == NULL)
+		return 0;
+
+	struct vsock_dev *vdev = g_vdev;
+	g_vdev = NULL;
+
+	close(vdev->vhost_fd);
+	free(vdev);
+
+	return 0;
+}
+virtio_dev_exit(virtio_vsock_exit);