Merge git://git.kernel.org/pub/scm/linux/kernel/git/will/kvmtool
Pull in the latest commits from upstream.
BUG=None
TEST=precq passes
Change-Id: I04a0fc1cc251c917ea7439ea79d32f42a48ba64a
diff --git a/Makefile b/Makefile
index d07f2bb..75d83b6 100644
--- a/Makefile
+++ b/Makefile
@@ -137,7 +137,6 @@
DEFINES += -DCONFIG_PPC
OBJS += powerpc/boot.o
OBJS += powerpc/ioport.o
- OBJS += powerpc/irq.o
OBJS += powerpc/kvm.o
OBJS += powerpc/cpu_info.o
OBJS += powerpc/kvm-cpu.o
@@ -152,7 +151,7 @@
endif
# ARM
-OBJS_ARM_COMMON := arm/fdt.o arm/gic.o arm/ioport.o arm/irq.o \
+OBJS_ARM_COMMON := arm/fdt.o arm/gic.o arm/gicv2m.o arm/ioport.o \
arm/kvm.o arm/kvm-cpu.o arm/pci.o arm/timer.o \
arm/pmu.o
HDRS_ARM_COMMON := arm/include
@@ -185,7 +184,6 @@
ARCH_INCLUDE := mips/include
OBJS += mips/kvm.o
OBJS += mips/kvm-cpu.o
- OBJS += mips/irq.o
endif
###
@@ -367,7 +365,7 @@
CFLAGS += -Werror
endif
-all: $(PROGRAM) $(PROGRAM_ALIAS) $(GUEST_INIT) $(GUEST_PRE_INIT)
+all: $(PROGRAM) $(PROGRAM_ALIAS)
# CFLAGS used when building objects
# This is intentionally not assigned using :=
@@ -384,11 +382,11 @@
STATIC_DEPS := $(foreach obj,$(STATIC_OBJS),\
$(subst $(comma),_,$(dir $(obj)).$(notdir $(obj)).d))
-$(PROGRAM)-static: $(STATIC_OBJS) $(OTHEROBJS) $(GUEST_INIT) $(GUEST_PRE_INIT)
+$(PROGRAM)-static: $(STATIC_OBJS) $(OTHEROBJS) $(GUEST_OBJS)
$(E) " LINK " $@
$(Q) $(CC) -static $(CFLAGS) $(STATIC_OBJS) $(OTHEROBJS) $(GUEST_OBJS) $(LDFLAGS) $(LIBS) $(LIBS_STATOPT) -o $@
-$(PROGRAM): $(OBJS) $(OBJS_DYNOPT) $(OTHEROBJS) $(GUEST_INIT) $(GUEST_PRE_INIT)
+$(PROGRAM): $(OBJS) $(OBJS_DYNOPT) $(OTHEROBJS) $(GUEST_OBJS)
$(E) " LINK " $@
$(Q) $(CC) $(CFLAGS) $(OBJS) $(OBJS_DYNOPT) $(OTHEROBJS) $(GUEST_OBJS) $(LDFLAGS) $(LIBS) $(LIBS_DYNOPT) -o $@
@@ -398,15 +396,21 @@
ifneq ($(ARCH_PRE_INIT),)
$(GUEST_PRE_INIT): $(ARCH_PRE_INIT)
- $(E) " LINK " $@
- $(Q) $(CC) -s $(PIE_FLAGS) -nostdlib $(ARCH_PRE_INIT) -o $@
- $(Q) $(LD) -r -b binary -o guest/guest_pre_init.o $(GUEST_PRE_INIT)
+ $(E) " COMPILE " $@
+ $(Q) $(CC) -s $(PIE_FLAGS) -nostdlib $< -o $@
+
+guest/guest_pre_init.c: $(GUEST_PRE_INIT)
+ $(E) " CONVERT " $@
+ $(Q) $(call binary-to-C,$<,pre_init_binary,$@)
endif
$(GUEST_INIT): guest/init.c
- $(E) " LINK " $@
- $(Q) $(CC) $(GUEST_INIT_FLAGS) guest/init.c -o $@
- $(Q) $(LD) -r -b binary -o guest/guest_init.o $(GUEST_INIT)
+ $(E) " COMPILE " $@
+ $(Q) $(CC) $(GUEST_INIT_FLAGS) $< -o $@
+
+guest/guest_init.c: $(GUEST_INIT)
+ $(E) " CONVERT " $@
+ $(Q) $(call binary-to-C,$<,init_binary,$@)
%.s: %.c
$(Q) $(CC) -o $@ -S $(CFLAGS) -fverbose-asm $<
@@ -497,6 +501,7 @@
$(Q) rm -f tests/boot/boot_test.iso
$(Q) rm -rf tests/boot/rootfs/
$(Q) rm -f $(DEPS) $(STATIC_DEPS) $(OBJS) $(OTHEROBJS) $(OBJS_DYNOPT) $(STATIC_OBJS) $(PROGRAM) $(PROGRAM_ALIAS) $(PROGRAM)-static $(GUEST_INIT) $(GUEST_PRE_INIT) $(GUEST_OBJS)
+ $(Q) rm -f guest/guest_init.c guest/guest_pre_init.c
$(Q) rm -f cscope.*
$(Q) rm -f tags
$(Q) rm -f TAGS
diff --git a/README b/README
index 5501f05..52124b8 100644
--- a/README
+++ b/README
@@ -104,5 +104,10 @@
Contributing
------------
-Please send patches for kvmtool to kvm@vger.kernel.org , in the usual git patch
-format. Include "kvmtool" in the mail subject.
+Please send patches for kvmtool to kvm@vger.kernel.org, in the usual git
+patch format, including "kvmtool" in the mail subject. "kvmtool" can be
+added automatically by issuing the command
+
+ git config format.subjectprefix "PATCH kvmtool"
+
+in the git repository.
diff --git a/arm/aarch32/arm-cpu.c b/arm/aarch32/arm-cpu.c
index d8d6293..16bba55 100644
--- a/arm/aarch32/arm-cpu.c
+++ b/arm/aarch32/arm-cpu.c
@@ -8,11 +8,11 @@
#include <linux/byteorder.h>
#include <linux/types.h>
-static void generate_fdt_nodes(void *fdt, struct kvm *kvm, u32 gic_phandle)
+static void generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
int timer_interrupts[4] = {13, 14, 11, 10};
- gic__generate_fdt_nodes(fdt, gic_phandle, IRQCHIP_GICV2);
+ gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip);
timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
}
diff --git a/arm/aarch32/include/asm/kvm.h b/arm/aarch32/include/asm/kvm.h
index df3f60c..0220667 100644
--- a/arm/aarch32/include/asm/kvm.h
+++ b/arm/aarch32/include/asm/kvm.h
@@ -84,6 +84,17 @@
#define KVM_VGIC_V2_DIST_SIZE 0x1000
#define KVM_VGIC_V2_CPU_SIZE 0x2000
+#define KVM_VGIC_V2M_SIZE 0x1000
+
+/* Supported VGICv3 address types */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
+#define KVM_VGIC_ITS_ADDR_TYPE 4
+
+#define KVM_VGIC_V3_DIST_SIZE SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
+
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
@@ -139,8 +150,8 @@
#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
-#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
+#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
/* Normal registers are mapped as coprocessor 16. */
#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
@@ -172,10 +183,23 @@
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* KVM_IRQ_LINE irq field index values */
diff --git a/arm/aarch32/include/kvm/fdt-arch.h b/arm/aarch32/include/kvm/fdt-arch.h
new file mode 100644
index 0000000..e448bf1
--- /dev/null
+++ b/arm/aarch32/include/kvm/fdt-arch.h
@@ -0,0 +1,6 @@
+#ifndef KVM__KVM_FDT_H
+#define KVM__KVM_FDT_H
+
+#include "arm-common/fdt-arch.h"
+
+#endif /* KVM__KVM_FDT_H */
diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h
index 1632e3c..cd31e72 100644
--- a/arm/aarch32/include/kvm/kvm-arch.h
+++ b/arm/aarch32/include/kvm/kvm-arch.h
@@ -1,9 +1,6 @@
#ifndef KVM__KVM_ARCH_H
#define KVM__KVM_ARCH_H
-#define ARM_GIC_DIST_SIZE 0x1000
-#define ARM_GIC_CPUI_SIZE 0x2000
-
#define ARM_KERN_OFFSET(...) 0x8000
#define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY
diff --git a/arm/aarch64/arm-cpu.c b/arm/aarch64/arm-cpu.c
index c21c0bb..d7572b7 100644
--- a/arm/aarch64/arm-cpu.c
+++ b/arm/aarch64/arm-cpu.c
@@ -10,10 +10,11 @@
#include <linux/byteorder.h>
#include <linux/types.h>
-static void generate_fdt_nodes(void *fdt, struct kvm *kvm, u32 gic_phandle)
+static void generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
int timer_interrupts[4] = {13, 14, 11, 10};
- gic__generate_fdt_nodes(fdt, gic_phandle, kvm->cfg.arch.irqchip);
+
+ gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip);
timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
pmu__generate_fdt_nodes(fdt, kvm);
}
diff --git a/arm/aarch64/include/asm/kvm.h b/arm/aarch64/include/asm/kvm.h
index f209ea1..7d14507 100644
--- a/arm/aarch64/include/asm/kvm.h
+++ b/arm/aarch64/include/asm/kvm.h
@@ -84,12 +84,16 @@
#define KVM_VGIC_V2_DIST_SIZE 0x1000
#define KVM_VGIC_V2_CPU_SIZE 0x2000
+#define KVM_VGIC_V2M_SIZE 0x1000
+
/* Supported VGICv3 address types */
#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
+#define KVM_VGIC_ITS_ADDR_TYPE 4
#define KVM_VGIC_V3_DIST_SIZE SZ_64K
#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
@@ -199,10 +203,23 @@
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* Device Control API on vcpu fd */
diff --git a/arm/aarch64/include/kvm/fdt-arch.h b/arm/aarch64/include/kvm/fdt-arch.h
new file mode 100644
index 0000000..e448bf1
--- /dev/null
+++ b/arm/aarch64/include/kvm/fdt-arch.h
@@ -0,0 +1,6 @@
+#ifndef KVM__KVM_FDT_H
+#define KVM__KVM_FDT_H
+
+#include "arm-common/fdt-arch.h"
+
+#endif /* KVM__KVM_FDT_H */
diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h
index 4925736..9de623a 100644
--- a/arm/aarch64/include/kvm/kvm-arch.h
+++ b/arm/aarch64/include/kvm/kvm-arch.h
@@ -1,9 +1,6 @@
#ifndef KVM__KVM_ARCH_H
#define KVM__KVM_ARCH_H
-#define ARM_GIC_DIST_SIZE 0x10000
-#define ARM_GIC_CPUI_SIZE 0x20000
-
#define ARM_KERN_OFFSET(kvm) ((kvm)->cfg.arch.aarch32_guest ? \
0x8000 : \
0x80000)
diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h
index 5ef1f17..04be43d 100644
--- a/arm/aarch64/include/kvm/kvm-config-arch.h
+++ b/arm/aarch64/include/kvm/kvm-config-arch.h
@@ -5,7 +5,10 @@
OPT_BOOLEAN('\0', "aarch32", &(cfg)->aarch32_guest, \
"Run AArch32 guest"), \
OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \
- "Create PMUv3 device"),
+ "Create PMUv3 device"), \
+ OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \
+ "Specify random seed for Kernel Address Space " \
+ "Layout Randomization (KASLR)"),
#include "arm-common/kvm-config-arch.h"
diff --git a/arm/fdt.c b/arm/fdt.c
index 381d48f..980015b 100644
--- a/arm/fdt.c
+++ b/arm/fdt.c
@@ -40,7 +40,7 @@
close(fd);
}
-#define CPU_NAME_MAX_LEN 8
+#define CPU_NAME_MAX_LEN 15
static void generate_cpu_nodes(void *fdt, struct kvm *kvm)
{
int cpu;
@@ -114,7 +114,6 @@
{
struct device_header *dev_hdr;
u8 staging_fdt[FDT_MAX_SIZE];
- u32 gic_phandle = fdt__alloc_phandle();
u64 mem_reg_prop[] = {
cpu_to_fdt64(kvm->arch.memory_guest_start),
cpu_to_fdt64(kvm->ram_size),
@@ -125,7 +124,7 @@
kvm->arch.dtb_guest_start);
void (*generate_mmio_fdt_nodes)(void *, struct device_header *,
void (*)(void *, u8, enum irq_type));
- void (*generate_cpu_peripheral_fdt_nodes)(void *, struct kvm *, u32)
+ void (*generate_cpu_peripheral_fdt_nodes)(void *, struct kvm *)
= kvm->cpus[0]->generate_fdt_nodes;
/* Create new tree without a reserve map */
@@ -134,7 +133,7 @@
/* Header */
_FDT(fdt_begin_node(fdt, ""));
- _FDT(fdt_property_cell(fdt, "interrupt-parent", gic_phandle));
+ _FDT(fdt_property_cell(fdt, "interrupt-parent", PHANDLE_GIC));
_FDT(fdt_property_string(fdt, "compatible", "linux,dummy-virt"));
_FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
@@ -143,6 +142,7 @@
_FDT(fdt_begin_node(fdt, "chosen"));
_FDT(fdt_property_cell(fdt, "linux,pci-probe-only", 1));
_FDT(fdt_property_string(fdt, "bootargs", kvm->cfg.real_cmdline));
+ _FDT(fdt_property_u64(fdt, "kaslr-seed", kvm->cfg.arch.kaslr_seed));
/* Initrd */
if (kvm->arch.initrd_size != 0) {
@@ -166,7 +166,7 @@
/* CPU and peripherals (interrupt controller, timers, etc) */
generate_cpu_nodes(fdt, kvm);
if (generate_cpu_peripheral_fdt_nodes)
- generate_cpu_peripheral_fdt_nodes(fdt, kvm, gic_phandle);
+ generate_cpu_peripheral_fdt_nodes(fdt, kvm);
/* Virtio MMIO devices */
dev_hdr = device__first_dev(DEVICE_BUS_MMIO);
@@ -185,7 +185,7 @@
}
/* PCI host controller */
- pci__generate_fdt_nodes(fdt, gic_phandle);
+ pci__generate_fdt_nodes(fdt);
/* PSCI firmware */
_FDT(fdt_begin_node(fdt, "psci"));
diff --git a/arm/gic.c b/arm/gic.c
index d6d6dd0..aca0b93 100644
--- a/arm/gic.c
+++ b/arm/gic.c
@@ -8,19 +8,15 @@
#include <linux/byteorder.h>
#include <linux/kernel.h>
#include <linux/kvm.h>
+#include <linux/sizes.h>
-/* Those names are not defined for ARM (yet) */
-#ifndef KVM_VGIC_V3_ADDR_TYPE_DIST
-#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
-#endif
-
-#ifndef KVM_VGIC_V3_ADDR_TYPE_REDIST
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
-#endif
+#define IRQCHIP_GIC 0
static int gic_fd = -1;
static u64 gic_redists_base;
static u64 gic_redists_size;
+static u64 gic_msi_base;
+static u64 gic_msi_size = 0;
int irqchip_parser(const struct option *opt, const char *arg, int unset)
{
@@ -28,8 +24,12 @@
if (!strcmp(arg, "gicv2")) {
*type = IRQCHIP_GICV2;
+ } else if (!strcmp(arg, "gicv2m")) {
+ *type = IRQCHIP_GICV2M;
} else if (!strcmp(arg, "gicv3")) {
*type = IRQCHIP_GICV3;
+ } else if (!strcmp(arg, "gicv3-its")) {
+ *type = IRQCHIP_GICV3_ITS;
} else {
pr_err("irqchip: unknown type \"%s\"\n", arg);
return -1;
@@ -38,6 +38,86 @@
return 0;
}
+static int irq__routing_init(struct kvm *kvm)
+{
+ int r;
+ int irqlines = ALIGN(irq__get_nr_allocated_lines(), 32);
+
+ /*
+ * This describes the default routing that the kernel uses without
+ * any routing explicitly set up via KVM_SET_GSI_ROUTING. So we
+ * don't need to commit these setting right now. The first actual
+ * user (MSI routing) will engage these mappings then.
+ */
+ for (next_gsi = 0; next_gsi < irqlines; next_gsi++) {
+ r = irq__allocate_routing_entry();
+ if (r)
+ return r;
+
+ irq_routing->entries[irq_routing->nr++] =
+ (struct kvm_irq_routing_entry) {
+ .gsi = next_gsi,
+ .type = KVM_IRQ_ROUTING_IRQCHIP,
+ .u.irqchip.irqchip = IRQCHIP_GIC,
+ .u.irqchip.pin = next_gsi,
+ };
+ }
+
+ return 0;
+}
+
+static int gic__create_its_frame(struct kvm *kvm, u64 its_frame_addr)
+{
+ struct kvm_create_device its_device = {
+ .type = KVM_DEV_TYPE_ARM_VGIC_ITS,
+ .flags = 0,
+ };
+ struct kvm_device_attr its_attr = {
+ .group = KVM_DEV_ARM_VGIC_GRP_ADDR,
+ .attr = KVM_VGIC_ITS_ADDR_TYPE,
+ .addr = (u64)(unsigned long)&its_frame_addr,
+ };
+ struct kvm_device_attr its_init_attr = {
+ .group = KVM_DEV_ARM_VGIC_GRP_CTRL,
+ .attr = KVM_DEV_ARM_VGIC_CTRL_INIT,
+ };
+ int err;
+
+ err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, &its_device);
+ if (err) {
+ fprintf(stderr,
+ "GICv3 ITS requested, but kernel does not support it.\n");
+ fprintf(stderr, "Try --irqchip=gicv3 instead\n");
+ return err;
+ }
+
+ err = ioctl(its_device.fd, KVM_HAS_DEVICE_ATTR, &its_attr);
+ if (err) {
+ close(its_device.fd);
+ its_device.fd = -1;
+ return err;
+ }
+
+ err = ioctl(its_device.fd, KVM_SET_DEVICE_ATTR, &its_attr);
+ if (err)
+ return err;
+
+ return ioctl(its_device.fd, KVM_SET_DEVICE_ATTR, &its_init_attr);
+}
+
+static int gic__create_msi_frame(struct kvm *kvm, enum irqchip_type type,
+ u64 msi_frame_addr)
+{
+ switch (type) {
+ case IRQCHIP_GICV2M:
+ return gic__create_gicv2m_frame(kvm, msi_frame_addr);
+ case IRQCHIP_GICV3_ITS:
+ return gic__create_its_frame(kvm, msi_frame_addr);
+ default: /* No MSI frame needed */
+ return 0;
+ }
+}
+
static int gic__create_device(struct kvm *kvm, enum irqchip_type type)
{
int err;
@@ -62,11 +142,13 @@
};
switch (type) {
+ case IRQCHIP_GICV2M:
case IRQCHIP_GICV2:
gic_device.type = KVM_DEV_TYPE_ARM_VGIC_V2;
dist_attr.attr = KVM_VGIC_V2_ADDR_TYPE_DIST;
break;
case IRQCHIP_GICV3:
+ case IRQCHIP_GICV3_ITS:
gic_device.type = KVM_DEV_TYPE_ARM_VGIC_V3;
dist_attr.attr = KVM_VGIC_V3_ADDR_TYPE_DIST;
break;
@@ -79,9 +161,11 @@
gic_fd = gic_device.fd;
switch (type) {
+ case IRQCHIP_GICV2M:
case IRQCHIP_GICV2:
err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
break;
+ case IRQCHIP_GICV3_ITS:
case IRQCHIP_GICV3:
err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &redist_attr);
break;
@@ -93,6 +177,10 @@
if (err)
goto out_err;
+ err = gic__create_msi_frame(kvm, type, gic_msi_base);
+ if (err)
+ goto out_err;
+
return 0;
out_err:
@@ -134,11 +222,20 @@
int err;
switch (type) {
+ case IRQCHIP_GICV2M:
+ gic_msi_size = KVM_VGIC_V2M_SIZE;
+ gic_msi_base = ARM_GIC_DIST_BASE - gic_msi_size;
+ break;
case IRQCHIP_GICV2:
break;
+ case IRQCHIP_GICV3_ITS:
+ /* We reserve the 64K page with the doorbell as well. */
+ gic_msi_size = KVM_VGIC_V3_ITS_SIZE + SZ_64K;
+ /* fall through */
case IRQCHIP_GICV3:
gic_redists_size = kvm->cfg.nrcpus * ARM_GIC_REDIST_SIZE;
gic_redists_base = ARM_GIC_DIST_BASE - gic_redists_size;
+ gic_msi_base = gic_redists_base - gic_msi_size;
break;
default:
return -ENODEV;
@@ -184,30 +281,42 @@
return ret;
}
+ irq__routing_init(kvm);
+
if (!ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &vgic_init_attr)) {
ret = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &vgic_init_attr);
if (ret)
return ret;
}
+ kvm->msix_needs_devid = kvm__supports_vm_extension(kvm,
+ KVM_CAP_MSI_DEVID);
+
return 0;
}
late_init(gic__init_gic)
-void gic__generate_fdt_nodes(void *fdt, u32 phandle, enum irqchip_type type)
+void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
{
- const char *compatible;
+ const char *compatible, *msi_compatible = NULL;
+ u64 msi_prop[2];
u64 reg_prop[] = {
cpu_to_fdt64(ARM_GIC_DIST_BASE), cpu_to_fdt64(ARM_GIC_DIST_SIZE),
0, 0, /* to be filled */
};
switch (type) {
+ case IRQCHIP_GICV2M:
+ msi_compatible = "arm,gic-v2m-frame";
+ /* fall-through */
case IRQCHIP_GICV2:
compatible = "arm,cortex-a15-gic";
reg_prop[2] = cpu_to_fdt64(ARM_GIC_CPUI_BASE);
reg_prop[3] = cpu_to_fdt64(ARM_GIC_CPUI_SIZE);
break;
+ case IRQCHIP_GICV3_ITS:
+ msi_compatible = "arm,gic-v3-its";
+ /* fall-through */
case IRQCHIP_GICV3:
compatible = "arm,gic-v3";
reg_prop[2] = cpu_to_fdt64(gic_redists_base);
@@ -222,7 +331,23 @@
_FDT(fdt_property_cell(fdt, "#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS));
_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
- _FDT(fdt_property_cell(fdt, "phandle", phandle));
+ _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_GIC));
+ _FDT(fdt_property_cell(fdt, "#address-cells", 2));
+ _FDT(fdt_property_cell(fdt, "#size-cells", 2));
+
+ if (msi_compatible) {
+ _FDT(fdt_property(fdt, "ranges", NULL, 0));
+
+ _FDT(fdt_begin_node(fdt, "msic"));
+ _FDT(fdt_property_string(fdt, "compatible", msi_compatible));
+ _FDT(fdt_property(fdt, "msi-controller", NULL, 0));
+ _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_MSI));
+ msi_prop[0] = cpu_to_fdt64(gic_msi_base);
+ msi_prop[1] = cpu_to_fdt64(gic_msi_size);
+ _FDT(fdt_property(fdt, "reg", msi_prop, sizeof(msi_prop)));
+ _FDT(fdt_end_node(fdt));
+ }
+
_FDT(fdt_end_node(fdt));
}
diff --git a/arm/gicv2m.c b/arm/gicv2m.c
new file mode 100644
index 0000000..d7e6398
--- /dev/null
+++ b/arm/gicv2m.c
@@ -0,0 +1,153 @@
+#include <errno.h>
+#include <stdlib.h>
+
+#include "kvm/irq.h"
+#include "kvm/kvm.h"
+#include "kvm/util.h"
+
+#include "arm-common/gic.h"
+
+#define GICV2M_MSI_TYPER 0x008
+#define GICV2M_MSI_SETSPI 0x040
+#define GICV2M_MSI_IIDR 0xfcc
+
+#define GICV2M_SPI_MASK 0x3ff
+#define GICV2M_MSI_TYPER_VAL(start, nr) \
+ (((start) & GICV2M_SPI_MASK) << 16 | ((nr) & GICV2M_SPI_MASK))
+
+struct gicv2m_chip {
+ int first_spi;
+ int num_spis;
+ int *spis;
+ u64 base;
+ u64 size;
+};
+
+static struct gicv2m_chip v2m;
+
+/*
+ * MSI routing is setup lazily, when the guest writes the MSI tables. The guest
+ * writes which SPI is associated to an MSI vector into the message data field.
+ * The IRQ code notifies us of any change to MSI routing via this callback.
+ * Store the MSI->SPI translation for later.
+ *
+ * Data is the GIC interrupt ID, that includes SGIs and PPIs. SGIs at 0-15, PPIs
+ * are 16-31 and SPIs are 32-1019. What we're saving for later is the MSI's GSI
+ * number, a logical ID used by KVM for routing. The GSI of an SPI is implicitly
+ * defined by KVM to be its pin number (SPI index), and the GSI of an MSI is
+ * allocated by kvmtool.
+ */
+static int gicv2m_update_routing(struct kvm *kvm,
+ struct kvm_irq_routing_entry *entry)
+{
+ int spi;
+
+ if (entry->type != KVM_IRQ_ROUTING_MSI)
+ return -EINVAL;
+
+ if (!entry->u.msi.address_hi && !entry->u.msi.address_lo)
+ return 0;
+
+ spi = entry->u.msi.data & GICV2M_SPI_MASK;
+ if (spi < v2m.first_spi || spi >= v2m.first_spi + v2m.num_spis) {
+ pr_err("invalid SPI number %d", spi);
+ return -EINVAL;
+ }
+
+ v2m.spis[spi - v2m.first_spi] = entry->gsi;
+
+ return 0;
+}
+
+/*
+ * Find SPI bound to the given MSI and return the associated GSI.
+ */
+static int gicv2m_translate_gsi(struct kvm *kvm, u32 gsi)
+{
+ int i;
+
+ for (i = 0; i < v2m.num_spis; i++) {
+ if (v2m.spis[i] == (int)gsi)
+ return i + v2m.first_spi - KVM_IRQ_OFFSET;
+ }
+
+ /* Not an MSI */
+ return gsi;
+}
+
+static bool gicv2m_can_signal_msi(struct kvm *kvm)
+{
+ return true;
+}
+
+/*
+ * Instead of setting up MSI routes, virtual devices can also trigger them
+ * manually (like a direct write to MSI_SETSPI). In this case, trigger the SPI
+ * directly.
+ */
+static int gicv2m_signal_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+ int spi = msi->data & GICV2M_SPI_MASK;
+
+ if (spi < v2m.first_spi || spi >= v2m.first_spi + v2m.num_spis) {
+ pr_err("invalid SPI number %d", spi);
+ return -EINVAL;
+ }
+
+ kvm__irq_trigger(kvm, spi);
+ return 0;
+}
+
+static struct msi_routing_ops gicv2m_routing = {
+ .update_route = gicv2m_update_routing,
+ .translate_gsi = gicv2m_translate_gsi,
+ .can_signal_msi = gicv2m_can_signal_msi,
+ .signal_msi = gicv2m_signal_msi,
+};
+
+static void gicv2m_mmio_callback(struct kvm_cpu *vcpu, u64 addr, u8 *data,
+ u32 len, u8 is_write, void *ptr)
+{
+ if (is_write)
+ return;
+
+ addr -= v2m.base;
+
+ switch (addr) {
+ case GICV2M_MSI_TYPER:
+ *(u32 *)data = GICV2M_MSI_TYPER_VAL(v2m.first_spi,
+ v2m.num_spis);
+ break;
+ case GICV2M_MSI_IIDR:
+ *(u32 *)data = 0x0;
+ break;
+ }
+}
+
+int gic__create_gicv2m_frame(struct kvm *kvm, u64 base)
+{
+ int i;
+ int irq = irq__alloc_line();
+
+ v2m = (struct gicv2m_chip) {
+ .first_spi = irq, /* Includes GIC_SPI_IRQ_BASE */
+ .num_spis = 64, /* arbitrary */
+ .base = base,
+ .size = KVM_VGIC_V2M_SIZE,
+ };
+
+ v2m.spis = calloc(v2m.num_spis, sizeof(int));
+ if (!v2m.spis)
+ return -ENOMEM;
+
+ v2m.spis[0] = -1;
+ for (i = 1; i < v2m.num_spis; i++) {
+ irq__alloc_line();
+ v2m.spis[i] = -1;
+ }
+
+ msi_routing_ops = &gicv2m_routing;
+
+ return kvm__register_mmio(kvm, base, KVM_VGIC_V2M_SIZE, false,
+ gicv2m_mmio_callback, kvm);
+}
diff --git a/arm/include/arm-common/fdt-arch.h b/arm/include/arm-common/fdt-arch.h
new file mode 100644
index 0000000..60c2d40
--- /dev/null
+++ b/arm/include/arm-common/fdt-arch.h
@@ -0,0 +1,6 @@
+#ifndef ARM__FDT_H
+#define ARM__FDT_H
+
+enum phandles {PHANDLE_RESERVED = 0, PHANDLE_GIC, PHANDLE_MSI, PHANDLES_MAX};
+
+#endif /* ARM__FDT_H */
diff --git a/arm/include/arm-common/gic.h b/arm/include/arm-common/gic.h
index 4fde5ac..687effc 100644
--- a/arm/include/arm-common/gic.h
+++ b/arm/include/arm-common/gic.h
@@ -23,13 +23,16 @@
enum irqchip_type {
IRQCHIP_GICV2,
+ IRQCHIP_GICV2M,
IRQCHIP_GICV3,
+ IRQCHIP_GICV3_ITS,
};
struct kvm;
int gic__alloc_irqnum(void);
int gic__create(struct kvm *kvm, enum irqchip_type type);
-void gic__generate_fdt_nodes(void *fdt, u32 phandle, enum irqchip_type type);
+int gic__create_gicv2m_frame(struct kvm *kvm, u64 msi_frame_addr);
+void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type);
#endif /* ARM_COMMON__GIC_H */
diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
index 0f5fb7f..c83c45f 100644
--- a/arm/include/arm-common/kvm-arch.h
+++ b/arm/include/arm-common/kvm-arch.h
@@ -18,6 +18,8 @@
#define ARM_GIC_DIST_BASE (ARM_AXI_AREA - ARM_GIC_DIST_SIZE)
#define ARM_GIC_CPUI_BASE (ARM_GIC_DIST_BASE - ARM_GIC_CPUI_SIZE)
#define ARM_GIC_SIZE (ARM_GIC_DIST_SIZE + ARM_GIC_CPUI_SIZE)
+#define ARM_GIC_DIST_SIZE 0x10000
+#define ARM_GIC_CPUI_SIZE 0x20000
#define ARM_IOPORT_SIZE (ARM_MMIO_AREA - ARM_IOPORT_AREA)
#define ARM_VIRTIO_MMIO_SIZE (ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h
index ed626b5..6a196f1 100644
--- a/arm/include/arm-common/kvm-config-arch.h
+++ b/arm/include/arm-common/kvm-config-arch.h
@@ -9,6 +9,7 @@
bool virtio_trans_pci;
bool aarch32_guest;
bool has_pmuv3;
+ u64 kaslr_seed;
enum irqchip_type irqchip;
};
@@ -27,7 +28,7 @@
"Force virtio devices to use PCI as their default " \
"transport"), \
OPT_CALLBACK('\0', "irqchip", &(cfg)->irqchip, \
- "[gicv2|gicv3]", \
+ "[gicv2|gicv2m|gicv3|gicv3-its]", \
"Type of interrupt controller to emulate in the guest", \
irqchip_parser, NULL),
diff --git a/arm/include/arm-common/kvm-cpu-arch.h b/arm/include/arm-common/kvm-cpu-arch.h
index 8a6a6e7..923d2c4 100644
--- a/arm/include/arm-common/kvm-cpu-arch.h
+++ b/arm/include/arm-common/kvm-cpu-arch.h
@@ -25,8 +25,7 @@
struct kvm_coalesced_mmio_ring *ring;
- void (*generate_fdt_nodes)(void *fdt, struct kvm* kvm,
- u32 gic_phandle);
+ void (*generate_fdt_nodes)(void *fdt, struct kvm* kvm);
};
struct kvm_arm_target {
diff --git a/arm/include/arm-common/pci.h b/arm/include/arm-common/pci.h
index ee87725..9008a0e 100644
--- a/arm/include/arm-common/pci.h
+++ b/arm/include/arm-common/pci.h
@@ -1,6 +1,6 @@
#ifndef ARM_COMMON__PCI_H
#define ARM_COMMON__PCI_H
-void pci__generate_fdt_nodes(void *fdt, u32 gic_phandle);
+void pci__generate_fdt_nodes(void *fdt);
#endif /* ARM_COMMON__PCI_H */
diff --git a/arm/irq.c b/arm/irq.c
deleted file mode 100644
index d8f44df..0000000
--- a/arm/irq.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "kvm/irq.h"
-#include "kvm/kvm.h"
-#include "kvm/util.h"
-
-int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
-{
- die(__FUNCTION__);
- return 0;
-}
diff --git a/arm/kvm.c b/arm/kvm.c
index 3cfa90a..2ab436e 100644
--- a/arm/kvm.c
+++ b/arm/kvm.c
@@ -49,10 +49,8 @@
void kvm__arch_read_term(struct kvm *kvm)
{
- if (term_readable(0)) {
- serial8250__update_consoles(kvm);
- virtio_console__inject_interrupt(kvm);
- }
+ serial8250__update_consoles(kvm);
+ virtio_console__inject_interrupt(kvm);
}
void kvm__arch_set_cmdline(char *cmdline, bool video)
diff --git a/arm/pci.c b/arm/pci.c
index 99a8130..813df26 100644
--- a/arm/pci.c
+++ b/arm/pci.c
@@ -18,10 +18,12 @@
struct of_interrupt_map_entry {
struct of_pci_irq_mask pci_irq_mask;
u32 gic_phandle;
+ u32 gic_addr_hi;
+ u32 gic_addr_lo;
struct of_gic_irq gic_irq;
} __attribute__((packed));
-void pci__generate_fdt_nodes(void *fdt, u32 gic_phandle)
+void pci__generate_fdt_nodes(void *fdt)
{
struct device_header *dev_hdr;
struct of_interrupt_map_entry irq_map[OF_PCI_IRQ_MAP_MAX];
@@ -60,10 +62,12 @@
_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
_FDT(fdt_property_cell(fdt, "#interrupt-cells", 0x1));
_FDT(fdt_property_string(fdt, "compatible", "pci-host-cam-generic"));
+ _FDT(fdt_property(fdt, "dma-coherent", NULL, 0));
_FDT(fdt_property(fdt, "bus-range", bus_range, sizeof(bus_range)));
_FDT(fdt_property(fdt, "reg", &cfg_reg_prop, sizeof(cfg_reg_prop)));
_FDT(fdt_property(fdt, "ranges", ranges, sizeof(ranges)));
+ _FDT(fdt_property_cell(fdt, "msi-parent", PHANDLE_MSI));
/* Generate the interrupt map ... */
dev_hdr = device__first_dev(DEVICE_BUS_PCI);
@@ -83,7 +87,9 @@
},
.pci_pin = cpu_to_fdt32(pin),
},
- .gic_phandle = cpu_to_fdt32(gic_phandle),
+ .gic_phandle = cpu_to_fdt32(PHANDLE_GIC),
+ .gic_addr_hi = 0,
+ .gic_addr_lo = 0,
.gic_irq = {
.type = cpu_to_fdt32(GIC_FDT_IRQ_TYPE_SPI),
.num = cpu_to_fdt32(irq - GIC_SPI_IRQ_BASE),
diff --git a/builtin-run.c b/builtin-run.c
index 4eff4ac..999b0f8 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -455,6 +455,7 @@
static char real_cmdline[2048], default_name[20];
unsigned int nr_online_cpus;
struct kvm *kvm = kvm__new();
+ bool video;
if (IS_ERR(kvm))
return kvm;
@@ -537,6 +538,8 @@
if (!kvm->cfg.console)
kvm->cfg.console = DEFAULT_CONSOLE;
+ video = kvm->cfg.vnc || kvm->cfg.sdl || kvm->cfg.gtk;
+
if (!strncmp(kvm->cfg.console, "virtio", 6))
kvm->cfg.active_console = CONSOLE_VIRTIO;
else if (!strncmp(kvm->cfg.console, "serial", 6))
@@ -565,7 +568,22 @@
kvm->cfg.network = DEFAULT_NETWORK;
memset(real_cmdline, 0, sizeof(real_cmdline));
- kvm__arch_set_cmdline(real_cmdline, kvm->cfg.vnc || kvm->cfg.sdl || kvm->cfg.gtk);
+ kvm__arch_set_cmdline(real_cmdline, video);
+
+ if (video) {
+ strcat(real_cmdline, " console=tty0");
+ } else {
+ switch (kvm->cfg.active_console) {
+ case CONSOLE_HV:
+ /* Fallthrough */
+ case CONSOLE_VIRTIO:
+ strcat(real_cmdline, " console=hvc0");
+ break;
+ case CONSOLE_8250:
+ strcat(real_cmdline, " console=ttyS0");
+ break;
+ }
+ }
if (!kvm->cfg.guest_name) {
if (kvm->cfg.custom_rootfs) {
diff --git a/builtin-setup.c b/builtin-setup.c
index 8be8d62..b24d2a1 100644
--- a/builtin-setup.c
+++ b/builtin-setup.c
@@ -123,7 +123,7 @@
#ifdef CONFIG_GUEST_INIT
static int extract_file(const char *guestfs_name, const char *filename,
- const void *data, const void *_size)
+ const void *data, size_t size)
{
char path[PATH_MAX];
int fd, ret;
@@ -138,7 +138,7 @@
die("Fail to setup %s", path);
}
- ret = xwrite(fd, data, (size_t)_size);
+ ret = xwrite(fd, data, size);
if (ret < 0)
die("Fail to setup %s", path);
close(fd);
@@ -146,10 +146,10 @@
return 0;
}
-extern char _binary_guest_init_start;
-extern char _binary_guest_init_size;
-extern char _binary_guest_pre_init_start;
-extern char _binary_guest_pre_init_size;
+extern unsigned char init_binary[];
+extern unsigned long init_binary_size;
+extern unsigned char pre_init_binary[];
+extern unsigned long pre_init_binary_size;
int kvm_setup_guest_init(const char *guestfs_name)
{
@@ -157,14 +157,12 @@
#ifdef CONFIG_GUEST_PRE_INIT
err = extract_file(guestfs_name, "virt/pre_init",
- &_binary_guest_pre_init_start,
- &_binary_guest_pre_init_size);
+ pre_init_binary, pre_init_binary_size);
if (err)
return err;
#endif
err = extract_file(guestfs_name, "virt/init",
- &_binary_guest_init_start,
- &_binary_guest_init_size);
+ init_binary, init_binary_size);
return err;
}
#else
diff --git a/config/utilities.mak b/config/utilities.mak
index 92af49b..612ed1f 100644
--- a/config/utilities.mak
+++ b/config/utilities.mak
@@ -194,3 +194,11 @@
echo "$(1)" | \
$(CC) -x c - $(2) $(3) -o "$$TMP" > /dev/null 2>&1 && echo y; \
rm -f "$$TMP"')
+
+# binary-to-C
+# create a C source file describing the binary input file as an array
+# Usage: $(call binary-to-C,binary-file,C-symbol-name,C-output-file)
+binary-to-C = stat -c "unsigned long $(2)_size = %s;" $1 > $3; \
+ echo "unsigned char $(2)[] = {" >> $3; \
+ od -v -tx1 -An -w12 $1 | sed -e "s/ \(..\)/0x\1, /g" >> $3; \
+ echo "};" >> $3
diff --git a/hw/pci-shmem.c b/hw/pci-shmem.c
index 1cac90b..6e3d70f 100644
--- a/hw/pci-shmem.c
+++ b/hw/pci-shmem.c
@@ -159,7 +159,10 @@
return fd;
if (pci_shmem_pci_device.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE)) {
- gsi = irq__add_msix_route(kvm, &msix_table[0].msg);
+ gsi = irq__add_msix_route(kvm, &msix_table[0].msg,
+ pci_shmem_device.dev_num << 3);
+ if (gsi < 0)
+ return gsi;
} else {
gsi = pci_shmem_pci_device.irq_line;
}
diff --git a/include/kvm/fdt.h b/include/kvm/fdt.h
index 53d85a4..beadc7f 100644
--- a/include/kvm/fdt.h
+++ b/include/kvm/fdt.h
@@ -7,6 +7,8 @@
#include <linux/types.h>
+#include "kvm/fdt-arch.h"
+
#define FDT_MAX_SIZE 0x10000
/* Those definitions are generic FDT values for specifying IRQ
@@ -33,10 +35,4 @@
} \
} while (0)
-static inline u32 fdt__alloc_phandle(void)
-{
- static u32 phandle = 0;
- return ++phandle;
-}
-
#endif /* KVM__FDT_H */
diff --git a/include/kvm/irq.h b/include/kvm/irq.h
index 8a78e43..8ba8b74 100644
--- a/include/kvm/irq.h
+++ b/include/kvm/irq.h
@@ -1,6 +1,7 @@
#ifndef KVM__IRQ_H
#define KVM__IRQ_H
+#include <stdbool.h>
#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/list.h>
@@ -10,11 +11,28 @@
struct kvm;
+struct msi_routing_ops {
+ int (*update_route)(struct kvm *kvm, struct kvm_irq_routing_entry *);
+ bool (*can_signal_msi)(struct kvm *kvm);
+ int (*signal_msi)(struct kvm *kvm, struct kvm_msi *msi);
+ int (*translate_gsi)(struct kvm *kvm, u32 gsi);
+};
+
+extern struct msi_routing_ops *msi_routing_ops;
+extern struct kvm_irq_routing *irq_routing;
+extern int next_gsi;
+
int irq__alloc_line(void);
int irq__get_nr_allocated_lines(void);
int irq__init(struct kvm *kvm);
int irq__exit(struct kvm *kvm);
-int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg);
+
+int irq__allocate_routing_entry(void);
+int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg, u32 device_id);
+void irq__update_msix_route(struct kvm *kvm, u32 gsi, struct msi_msg *msg);
+
+bool irq__can_signal_msi(struct kvm *kvm);
+int irq__signal_msi(struct kvm *kvm, struct kvm_msi *msi);
#endif
diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
index 4a76ec2..90463b8 100644
--- a/include/kvm/kvm.h
+++ b/include/kvm/kvm.h
@@ -63,6 +63,7 @@
struct list_head mem_banks;
bool nmi_disabled;
+ bool msix_needs_devid;
const char *vmlinux;
struct disk_image **disks;
@@ -129,6 +130,7 @@
}
bool kvm__supports_extension(struct kvm *kvm, unsigned int extension);
+bool kvm__supports_vm_extension(struct kvm *kvm, unsigned int extension);
static inline void kvm__set_thread_name(const char *name)
{
diff --git a/include/kvm/virtio-9p.h b/include/kvm/virtio-9p.h
index 19ffe50..3ea7698 100644
--- a/include/kvm/virtio-9p.h
+++ b/include/kvm/virtio-9p.h
@@ -5,7 +5,6 @@
#include "kvm/threadpool.h"
#include "kvm/parse-options.h"
-#include <sys/types.h>
#include <dirent.h>
#include <linux/list.h>
#include <linux/rbtree.h>
diff --git a/include/kvm/virtio.h b/include/kvm/virtio.h
index 768ee96..00a791a 100644
--- a/include/kvm/virtio.h
+++ b/include/kvm/virtio.h
@@ -17,10 +17,15 @@
#define VIRTIO_PCI_O_CONFIG 0
#define VIRTIO_PCI_O_MSIX 1
-#define VIRTIO_ENDIAN_HOST 0
#define VIRTIO_ENDIAN_LE (1 << 0)
#define VIRTIO_ENDIAN_BE (1 << 1)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define VIRTIO_ENDIAN_HOST VIRTIO_ENDIAN_LE
+#else
+#define VIRTIO_ENDIAN_HOST VIRTIO_ENDIAN_BE
+#endif
+
struct virt_queue {
struct vring vring;
u32 pfn;
@@ -40,7 +45,7 @@
#define VIRTIO_RING_ENDIAN VIRTIO_ENDIAN_HOST
#endif
-#if (VIRTIO_RING_ENDIAN & (VIRTIO_ENDIAN_LE | VIRTIO_ENDIAN_BE))
+#if VIRTIO_RING_ENDIAN != VIRTIO_ENDIAN_HOST
static inline __u16 __virtio_g2h_u16(u16 endian, __u16 val)
{
@@ -112,6 +117,8 @@
return virtio_guest_to_host_u16(vq, vq->vring.avail->idx) != vq->last_avail_idx;
}
+void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump);
+struct vring_used_elem * virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head, u32 len, u16 offset);
struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len);
bool virtio_queue__should_signal(struct virt_queue *vq);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a7f1f80..f51d508 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -218,7 +218,8 @@
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 padding1[7];
+ __u8 immediate_exit;
+ __u8 padding1[6];
/* out */
__u32 exit_reason;
@@ -651,6 +652,9 @@
};
/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
struct kvm_ppc_pvinfo {
/* out */
__u32 flags;
@@ -682,7 +686,12 @@
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
};
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+};
#define KVMIO 0xAE
@@ -865,6 +874,15 @@
#define KVM_CAP_SPAPR_TCE_64 125
#define KVM_CAP_ARM_PMU_V3 126
#define KVM_CAP_VCPU_ATTRIBUTES 127
+#define KVM_CAP_MAX_VCPU_ID 128
+#define KVM_CAP_X2APIC_API 129
+#define KVM_CAP_S390_USER_INSTR0 130
+#define KVM_CAP_MSI_DEVID 131
+#define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
+#define KVM_CAP_PPC_MMU_RADIX 134
+#define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
#ifdef KVM_CAP_IRQ_ROUTING
@@ -877,7 +895,10 @@
__u32 address_lo;
__u32 address_hi;
__u32 data;
- __u32 pad;
+ union {
+ __u32 pad;
+ __u32 devid;
+ };
};
struct kvm_irq_routing_s390_adapter {
@@ -964,12 +985,19 @@
__u8 pad[16];
};
+/* For KVM_CAP_ADJUST_CLOCK */
+
+/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
+#define KVM_CLOCK_TSC_STABLE 2
+
struct kvm_clock_data {
__u64 clock;
__u32 flags;
__u32 pad[9];
};
+/* For KVM_CAP_SW_TLB */
+
#define KVM_MMU_FSL_BOOKE_NOHV 0
#define KVM_MMU_FSL_BOOKE_HV 1
@@ -1023,12 +1051,14 @@
__u64 addr;
};
+#define KVM_MSI_VALID_DEVID (1U << 0)
struct kvm_msi {
__u32 address_lo;
__u32 address_hi;
__u32 data;
__u32 flags;
- __u8 pad[16];
+ __u32 devid;
+ __u8 pad[12];
};
struct kvm_arm_device_addr {
@@ -1073,6 +1103,8 @@
#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
KVM_DEV_TYPE_ARM_VGIC_V3,
#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3
+ KVM_DEV_TYPE_ARM_VGIC_ITS,
+#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
KVM_DEV_TYPE_MAX,
};
@@ -1167,6 +1199,13 @@
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
/* Available with KVM_CAP_PPC_RTAS */
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
+/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
+/* Available with KVM_CAP_PPC_RADIX_MMU */
+#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
@@ -1312,4 +1351,7 @@
__u16 padding[3];
};
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
#endif /* __LINUX_KVM_H */
diff --git a/irq.c b/irq.c
index 71eaa05..c89604c 100644
--- a/irq.c
+++ b/irq.c
@@ -1,7 +1,22 @@
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/kvm.h>
+#include <errno.h>
+
+#include "kvm/kvm.h"
#include "kvm/irq.h"
#include "kvm/kvm-arch.h"
static u8 next_line = KVM_IRQ_OFFSET;
+static int allocated_gsis = 0;
+
+int next_gsi;
+
+struct msi_routing_ops irq__default_routing_ops;
+struct msi_routing_ops *msi_routing_ops = &irq__default_routing_ops;
+
+struct kvm_irq_routing *irq_routing = NULL;
int irq__alloc_line(void)
{
@@ -12,3 +27,152 @@
{
return next_line - KVM_IRQ_OFFSET;
}
+
+int irq__allocate_routing_entry(void)
+{
+ size_t table_size = sizeof(struct kvm_irq_routing);
+ size_t old_size = table_size;
+ int nr_entries = 0;
+
+ if (irq_routing)
+ nr_entries = irq_routing->nr;
+
+ if (nr_entries < allocated_gsis)
+ return 0;
+
+ old_size += sizeof(struct kvm_irq_routing_entry) * allocated_gsis;
+ allocated_gsis = ALIGN(nr_entries + 1, 32);
+ table_size += sizeof(struct kvm_irq_routing_entry) * allocated_gsis;
+ irq_routing = realloc(irq_routing, table_size);
+
+ if (irq_routing == NULL)
+ return -ENOMEM;
+ memset((void *)irq_routing + old_size, 0, table_size - old_size);
+
+ irq_routing->nr = nr_entries;
+ irq_routing->flags = 0;
+
+ return 0;
+}
+
+static bool check_for_irq_routing(struct kvm *kvm)
+{
+ static int has_irq_routing = 0;
+
+ if (has_irq_routing == 0) {
+ if (kvm__supports_extension(kvm, KVM_CAP_IRQ_ROUTING))
+ has_irq_routing = 1;
+ else
+ has_irq_routing = -1;
+ }
+
+ return has_irq_routing > 0;
+}
+
+static int irq__update_msix_routes(struct kvm *kvm,
+ struct kvm_irq_routing_entry *entry)
+{
+ return ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, irq_routing);
+}
+
+static bool irq__default_can_signal_msi(struct kvm *kvm)
+{
+ return kvm__supports_extension(kvm, KVM_CAP_SIGNAL_MSI);
+}
+
+static int irq__default_signal_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+ return ioctl(kvm->vm_fd, KVM_SIGNAL_MSI, msi);
+}
+
+struct msi_routing_ops irq__default_routing_ops = {
+ .update_route = irq__update_msix_routes,
+ .signal_msi = irq__default_signal_msi,
+ .can_signal_msi = irq__default_can_signal_msi,
+};
+
+bool irq__can_signal_msi(struct kvm *kvm)
+{
+ return msi_routing_ops->can_signal_msi(kvm);
+}
+
+int irq__signal_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+ return msi_routing_ops->signal_msi(kvm, msi);
+}
+
+int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg, u32 device_id)
+{
+ int r;
+ struct kvm_irq_routing_entry *entry;
+
+ if (!check_for_irq_routing(kvm))
+ return -ENXIO;
+
+ r = irq__allocate_routing_entry();
+ if (r)
+ return r;
+
+ entry = &irq_routing->entries[irq_routing->nr];
+ *entry = (struct kvm_irq_routing_entry) {
+ .gsi = next_gsi,
+ .type = KVM_IRQ_ROUTING_MSI,
+ .u.msi.address_hi = msg->address_hi,
+ .u.msi.address_lo = msg->address_lo,
+ .u.msi.data = msg->data,
+ };
+
+ if (kvm->msix_needs_devid) {
+ entry->flags = KVM_MSI_VALID_DEVID;
+ entry->u.msi.devid = device_id;
+ }
+
+ irq_routing->nr++;
+
+ r = msi_routing_ops->update_route(kvm, entry);
+ if (r)
+ return r;
+
+ return next_gsi++;
+}
+
+static bool update_data(u32 *ptr, u32 newdata)
+{
+ if (*ptr == newdata)
+ return false;
+
+ *ptr = newdata;
+ return true;
+}
+
+void irq__update_msix_route(struct kvm *kvm, u32 gsi, struct msi_msg *msg)
+{
+ struct kvm_irq_routing_msi *entry;
+ unsigned int i;
+ bool changed;
+
+ for (i = 0; i < irq_routing->nr; i++)
+ if (gsi == irq_routing->entries[i].gsi)
+ break;
+ if (i == irq_routing->nr)
+ return;
+
+ entry = &irq_routing->entries[i].u.msi;
+
+ changed = update_data(&entry->address_hi, msg->address_hi);
+ changed |= update_data(&entry->address_lo, msg->address_lo);
+ changed |= update_data(&entry->data, msg->data);
+
+ if (!changed)
+ return;
+
+ if (msi_routing_ops->update_route(kvm, &irq_routing->entries[i]))
+ die_perror("KVM_SET_GSI_ROUTING");
+}
+
+int __attribute__((weak)) irq__exit(struct kvm *kvm)
+{
+ free(irq_routing);
+ return 0;
+}
+dev_base_exit(irq__exit);
diff --git a/kvm.c b/kvm.c
index 7fa76f7..f8f2fdc 100644
--- a/kvm.c
+++ b/kvm.c
@@ -93,6 +93,34 @@
return kvm_dir;
}
+bool kvm__supports_vm_extension(struct kvm *kvm, unsigned int extension)
+{
+ static int supports_vm_ext_check = 0;
+ int ret;
+
+ switch (supports_vm_ext_check) {
+ case 0:
+ ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION,
+ KVM_CAP_CHECK_EXTENSION_VM);
+ if (ret <= 0) {
+ supports_vm_ext_check = -1;
+ return false;
+ }
+ supports_vm_ext_check = 1;
+ /* fall through */
+ case 1:
+ break;
+ case -1:
+ return false;
+ }
+
+ ret = ioctl(kvm->vm_fd, KVM_CHECK_EXTENSION, extension);
+ if (ret < 0)
+ return false;
+
+ return ret;
+}
+
bool kvm__supports_extension(struct kvm *kvm, unsigned int extension)
{
int ret;
@@ -415,7 +443,7 @@
mutex_lock(&pause_lock);
/* Check if the guest is running */
- if (!kvm->cpus[0] || kvm->cpus[0]->thread == 0)
+ if (!kvm->cpus || !kvm->cpus[0] || kvm->cpus[0]->thread == 0)
return;
pause_event = eventfd(0, 0);
diff --git a/mips/include/asm/kvm.h b/mips/include/asm/kvm.h
index 6985eb5..a8a0199 100644
--- a/mips/include/asm/kvm.h
+++ b/mips/include/asm/kvm.h
@@ -19,6 +19,8 @@
* Some parts derived from the x86 version of this file.
*/
+#define __KVM_HAVE_READONLY_MEM
+
/*
* for KVM_GET_REGS and KVM_SET_REGS
*
diff --git a/mips/include/kvm/fdt-arch.h b/mips/include/kvm/fdt-arch.h
new file mode 100644
index 0000000..b030245
--- /dev/null
+++ b/mips/include/kvm/fdt-arch.h
@@ -0,0 +1,6 @@
+#ifndef KVM__KVM_FDT_H
+#define KVM__KVM_FDT_H
+
+enum phandles {PHANDLE_RESERVED = 0, PHANDLES_MAX};
+
+#endif /* KVM__KVM_FDT_H */
diff --git a/mips/irq.c b/mips/irq.c
deleted file mode 100644
index c1ff6bb..0000000
--- a/mips/irq.c
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "kvm/irq.h"
-#include "kvm/kvm.h"
-
-#include <stdlib.h>
-
-int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
-{
- pr_warning("irq__add_msix_route");
- return 1;
-}
diff --git a/powerpc/include/asm/kvm.h b/powerpc/include/asm/kvm.h
index c93cf35..4edbe4b 100644
--- a/powerpc/include/asm/kvm.h
+++ b/powerpc/include/asm/kvm.h
@@ -413,6 +413,26 @@
__u16 n_invalid;
};
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+ __u64 flags;
+ __u64 process_table; /* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+ struct kvm_ppc_radix_geom {
+ __u8 page_shift;
+ __u8 level_bits[4];
+ __u8 pad[3];
+ } geometries[8];
+ __u32 ap_encodings[8];
+};
+
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
@@ -573,6 +593,10 @@
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
@@ -596,6 +620,7 @@
#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
@@ -608,5 +633,7 @@
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42)
+#define KVM_XICS_PRESENTED (1ULL << 43)
+#define KVM_XICS_QUEUED (1ULL << 44)
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/powerpc/include/kvm/fdt-arch.h b/powerpc/include/kvm/fdt-arch.h
new file mode 100644
index 0000000..d48c055
--- /dev/null
+++ b/powerpc/include/kvm/fdt-arch.h
@@ -0,0 +1,6 @@
+#ifndef KVM__KVM_FDT_H
+#define KVM__KVM_FDT_H
+
+enum phandles {PHANDLE_RESERVED = 0, PHANDLE_XICP, PHANDLES_MAX};
+
+#endif /* KVM__KVM_FDT_H */
diff --git a/powerpc/irq.c b/powerpc/irq.c
deleted file mode 100644
index 03f2fe7..0000000
--- a/powerpc/irq.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * PPC64 IRQ routines
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "kvm/devices.h"
-#include "kvm/irq.h"
-#include "kvm/kvm.h"
-#include "kvm/util.h"
-
-#include <linux/types.h>
-#include <linux/rbtree.h>
-#include <linux/list.h>
-#include <linux/kvm.h>
-#include <sys/ioctl.h>
-
-#include <stddef.h>
-#include <stdlib.h>
-
-#include "kvm/pci.h"
-
-int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
-{
- die(__FUNCTION__);
- return 0;
-}
diff --git a/powerpc/kvm.c b/powerpc/kvm.c
index 3c1596d..c738c1d 100644
--- a/powerpc/kvm.c
+++ b/powerpc/kvm.c
@@ -40,8 +40,6 @@
#define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
-#define PHANDLE_XICP 0x00001111
-
static char kern_cmdline[2048];
struct kvm_ext kvm_req_ext[] = {
diff --git a/term.c b/term.c
index 58f66a0..b8a70fe 100644
--- a/term.c
+++ b/term.c
@@ -190,6 +190,7 @@
term = orig_term;
+ term.c_iflag &= ~(ICRNL);
term.c_lflag &= ~(ICANON | ECHO | ISIG);
tcsetattr(STDIN_FILENO, TCSANOW, &term);
diff --git a/virtio/9p.c b/virtio/9p.c
index 6acbfdd..69fdc4b 100644
--- a/virtio/9p.c
+++ b/virtio/9p.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
diff --git a/virtio/core.c b/virtio/core.c
index 3b6e4d7..d6ac289 100644
--- a/virtio/core.c
+++ b/virtio/core.c
@@ -21,22 +21,17 @@
return "unknown";
}
-struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len)
+void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump)
{
- struct vring_used_elem *used_elem;
u16 idx = virtio_guest_to_host_u16(queue, queue->vring.used->idx);
- used_elem = &queue->vring.used->ring[idx % queue->vring.num];
- used_elem->id = virtio_host_to_guest_u32(queue, head);
- used_elem->len = virtio_host_to_guest_u32(queue, len);
-
/*
* Use wmb to assure that used elem was updated with head and len.
* We need a wmb here since we can't advance idx unless we're ready
* to pass the used element to the guest.
*/
wmb();
- idx++;
+ idx += jump;
queue->vring.used->idx = virtio_host_to_guest_u16(queue, idx);
/*
@@ -45,6 +40,29 @@
* an updated idx.
*/
wmb();
+}
+
+struct vring_used_elem *
+virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head,
+ u32 len, u16 offset)
+{
+ struct vring_used_elem *used_elem;
+ u16 idx = virtio_guest_to_host_u16(queue, queue->vring.used->idx);
+
+ idx += offset;
+ used_elem = &queue->vring.used->ring[idx % queue->vring.num];
+ used_elem->id = virtio_host_to_guest_u32(queue, head);
+ used_elem->len = virtio_host_to_guest_u32(queue, len);
+
+ return used_elem;
+}
+
+struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len)
+{
+ struct vring_used_elem *used_elem;
+
+ used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0);
+ virt_queue__used_idx_advance(queue, 1);
return used_elem;
}
diff --git a/virtio/mmio.c b/virtio/mmio.c
index 5174455..f0af4bd 100644
--- a/virtio/mmio.c
+++ b/virtio/mmio.c
@@ -252,6 +252,7 @@
_FDT(fdt_begin_node(fdt, dev_name));
_FDT(fdt_property_string(fdt, "compatible", "virtio,mmio"));
_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
+ _FDT(fdt_property(fdt, "dma-coherent", NULL, 0));
generate_irq_prop(fdt, vmmio->irq, IRQ_TYPE_EDGE_RISING);
_FDT(fdt_end_node(fdt));
}
diff --git a/virtio/net.c b/virtio/net.c
index 6d1be65..419a5e3 100644
--- a/virtio/net.c
+++ b/virtio/net.c
@@ -52,6 +52,7 @@
int vhost_fd;
int tap_fd;
char tap_name[IFNAMSIZ];
+ bool tap_ufo;
int mode;
@@ -80,14 +81,12 @@
hdr->csum_offset = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
}
-static void virtio_net_fix_rx_hdr(struct virtio_net_hdr_mrg_rxbuf *hdr, struct net_dev *ndev)
+static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
{
- hdr->hdr.hdr_len = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.hdr_len);
- hdr->hdr.gso_size = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.gso_size);
- hdr->hdr.csum_start = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_start);
- hdr->hdr.csum_offset = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_offset);
- if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
- hdr->num_buffers = virtio_host_to_guest_u16(&ndev->vdev, hdr->num_buffers);
+ hdr->hdr_len = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len);
+ hdr->gso_size = virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size);
+ hdr->csum_start = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start);
+ hdr->csum_offset = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset);
}
static void *virtio_net_rx_thread(void *p)
@@ -123,7 +122,7 @@
.iov_len = sizeof(buffer),
};
struct virtio_net_hdr_mrg_rxbuf *hdr;
- int i;
+ u16 num_buffers;
len = ndev->ops->rx(&dummy_iov, 1, ndev);
if (len < 0) {
@@ -132,7 +131,7 @@
goto out_err;
}
- copied = i = 0;
+ copied = num_buffers = 0;
head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
hdr = iov[0].iov_base;
while (copied < len) {
@@ -140,19 +139,20 @@
memcpy_toiovec(iov, buffer + copied, iovsize);
copied += iovsize;
- if (i++ == 0)
- virtio_net_fix_rx_hdr(hdr, ndev);
- if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) {
- u16 num_buffers = virtio_guest_to_host_u16(vq, hdr->num_buffers);
- hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers + 1);
- }
- virt_queue__set_used_elem(vq, head, iovsize);
+ virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
if (copied == len)
break;
while (!virt_queue__available(vq))
sleep(0);
head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
}
+
+ virtio_net_fix_rx_hdr(&hdr->hdr, ndev);
+ if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
+ hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
+
+ virt_queue__used_idx_advance(vq, num_buffers);
+
/* We should interrupt guest right now, otherwise latency is huge. */
if (virtio_queue__should_signal(vq))
ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
@@ -316,34 +316,11 @@
static bool virtio_net__tap_init(struct net_dev *ndev)
{
int sock = socket(AF_INET, SOCK_STREAM, 0);
- int offload, hdr_len;
+ int hdr_len;
struct sockaddr_in sin = {0};
struct ifreq ifr;
const struct virtio_net_params *params = ndev->params;
bool skipconf = !!params->tapif;
- bool macvtap = skipconf && (params->tapif[0] == '/');
- const char *tap_file = "/dev/net/tun";
-
- /* Did the user already gave us the FD? */
- if (params->fd) {
- ndev->tap_fd = params->fd;
- return 1;
- }
-
- if (macvtap)
- tap_file = params->tapif;
-
- ndev->tap_fd = open(tap_file, O_RDWR);
- if (ndev->tap_fd < 0) {
- pr_warning("Unable to open %s", tap_file);
- goto fail;
- }
-
- if (!macvtap &&
- virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
- pr_warning("Config tap device error. Are you root?");
- goto fail;
- }
hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
sizeof(struct virtio_net_hdr_mrg_rxbuf) :
@@ -351,12 +328,6 @@
if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
pr_warning("Config tap device TUNSETVNETHDRSZ error");
- offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
- if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
- pr_warning("Config tap device TUNSETOFFLOAD error");
- goto fail;
- }
-
if (strcmp(params->script, "none")) {
if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
goto fail;
@@ -394,6 +365,68 @@
return 0;
}
+static bool virtio_net__tap_create(struct net_dev *ndev)
+{
+ int offload;
+ struct ifreq ifr;
+ const struct virtio_net_params *params = ndev->params;
+ bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
+
+ /* Did the user already gave us the FD? */
+ if (params->fd)
+ ndev->tap_fd = params->fd;
+ else {
+ const char *tap_file = "/dev/net/tun";
+
+ /* Did the user ask us to use macvtap? */
+ if (macvtap)
+ tap_file = params->tapif;
+
+ ndev->tap_fd = open(tap_file, O_RDWR);
+ if (ndev->tap_fd < 0) {
+ pr_warning("Unable to open %s", tap_file);
+ return 0;
+ }
+ }
+
+ if (!macvtap &&
+ virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
+ pr_warning("Config tap device error. Are you root?");
+ goto fail;
+ }
+
+ /*
+ * The UFO support had been removed from kernel in commit:
+ * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
+ * https://www.spinics.net/lists/netdev/msg443562.html
+ * In oder to support the older kernels without this commit,
+ * we set the TUN_F_UFO to offload by default to test the status of
+ * UFO kernel support.
+ */
+ ndev->tap_ufo = true;
+ offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
+ if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
+ /*
+ * Is this failure caused by kernel remove the UFO support?
+ * Try TUNSETOFFLOAD without TUN_F_UFO.
+ */
+ offload &= ~TUN_F_UFO;
+ if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
+ pr_warning("Config tap device TUNSETOFFLOAD error");
+ goto fail;
+ }
+ ndev->tap_ufo = false;
+ }
+
+ return 1;
+
+fail:
+ if ((ndev->tap_fd >= 0) || (!params->fd) )
+ close(ndev->tap_fd);
+
+ return 0;
+}
+
static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
{
return writev(ndev->tap_fd, iov, out);
@@ -433,14 +466,13 @@
static u32 get_host_features(struct kvm *kvm, void *dev)
{
+ u32 features;
struct net_dev *ndev = dev;
- return 1UL << VIRTIO_NET_F_MAC
+ features = 1UL << VIRTIO_NET_F_MAC
| 1UL << VIRTIO_NET_F_CSUM
- | 1UL << VIRTIO_NET_F_HOST_UFO
| 1UL << VIRTIO_NET_F_HOST_TSO4
| 1UL << VIRTIO_NET_F_HOST_TSO6
- | 1UL << VIRTIO_NET_F_GUEST_UFO
| 1UL << VIRTIO_NET_F_GUEST_TSO4
| 1UL << VIRTIO_NET_F_GUEST_TSO6
| 1UL << VIRTIO_RING_F_EVENT_IDX
@@ -448,6 +480,16 @@
| 1UL << VIRTIO_NET_F_CTRL_VQ
| 1UL << VIRTIO_NET_F_MRG_RXBUF
| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
+
+ /*
+ * The UFO feature for host and guest only can be enabled when the
+ * kernel has TAP UFO support.
+ */
+ if (ndev->tap_ufo)
+ features |= (1UL << VIRTIO_NET_F_HOST_UFO
+ | 1UL << VIRTIO_NET_F_GUEST_UFO);
+
+ return features;
}
static int virtio_net__vhost_set_features(struct net_dev *ndev)
@@ -479,7 +521,8 @@
if (ndev->mode == NET_MODE_TAP) {
if (!virtio_net__tap_init(ndev))
- die_perror("You have requested a TAP device, but creation of one has failed because");
+ die_perror("TAP device initialized failed because");
+
if (ndev->vhost_fd &&
virtio_net__vhost_set_features(ndev) != 0)
die_perror("VHOST_SET_FEATURES failed");
@@ -531,7 +574,7 @@
}
if (queue->endian != VIRTIO_ENDIAN_HOST)
- die_perror("VHOST requires VIRTIO_ENDIAN_HOST");
+ die_perror("VHOST requires the same endianness in guest and host");
state.num = queue->vring.num;
r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
@@ -821,6 +864,8 @@
ndev->mode = params->mode;
if (ndev->mode == NET_MODE_TAP) {
ndev->ops = &tap_ops;
+ if (!virtio_net__tap_create(ndev))
+ die_perror("You have requested a TAP device, but creation of one has failed because");
} else {
ndev->info.host_ip = ntohl(inet_addr(params->host_ip));
ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip));
diff --git a/virtio/pci.c b/virtio/pci.c
index 90fcd64..4ce1111 100644
--- a/virtio/pci.c
+++ b/virtio/pci.c
@@ -152,11 +152,36 @@
return ret;
}
+static void update_msix_map(struct virtio_pci *vpci,
+ struct msix_table *msix_entry, u32 vecnum)
+{
+ u32 gsi, i;
+
+ /* Find the GSI number used for that vector */
+ if (vecnum == vpci->config_vector) {
+ gsi = vpci->config_gsi;
+ } else {
+ for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++)
+ if (vpci->vq_vector[i] == vecnum)
+ break;
+ if (i == VIRTIO_PCI_MAX_VQ)
+ return;
+ gsi = vpci->gsis[i];
+ }
+
+ if (gsi == 0)
+ return;
+
+ msix_entry = &msix_entry[vecnum];
+ irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg);
+}
+
static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device *vdev, u16 port,
void *data, int size, int offset)
{
struct virtio_pci *vpci = vdev->virtio;
- u32 config_offset, gsi, vec;
+ u32 config_offset, vec;
+ int gsi;
int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci),
&config_offset);
if (type == VIRTIO_PCI_O_MSIX) {
@@ -166,21 +191,52 @@
if (vec == VIRTIO_MSI_NO_VECTOR)
break;
- gsi = irq__add_msix_route(kvm, &vpci->msix_table[vec].msg);
+ gsi = irq__add_msix_route(kvm,
+ &vpci->msix_table[vec].msg,
+ vpci->dev_hdr.dev_num << 3);
+ /*
+ * We don't need IRQ routing if we can use
+ * MSI injection via the KVM_SIGNAL_MSI ioctl.
+ */
+ if (gsi == -ENXIO &&
+ vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
+ break;
+
+ if (gsi < 0) {
+ die("failed to configure MSIs");
+ break;
+ }
vpci->config_gsi = gsi;
break;
case VIRTIO_MSI_QUEUE_VECTOR:
- vec = vpci->vq_vector[vpci->queue_selector] = ioport__read16(data);
+ vec = ioport__read16(data);
+ vpci->vq_vector[vpci->queue_selector] = vec;
if (vec == VIRTIO_MSI_NO_VECTOR)
break;
- gsi = irq__add_msix_route(kvm, &vpci->msix_table[vec].msg);
+ gsi = irq__add_msix_route(kvm,
+ &vpci->msix_table[vec].msg,
+ vpci->dev_hdr.dev_num << 3);
+ /*
+ * We don't need IRQ routing if we can use
+ * MSI injection via the KVM_SIGNAL_MSI ioctl.
+ */
+ if (gsi == -ENXIO &&
+ vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
+ break;
+
+ if (gsi < 0) {
+ die("failed to configure MSIs");
+ break;
+ }
+
vpci->gsis[vpci->queue_selector] = gsi;
if (vdev->ops->notify_vq_gsi)
vdev->ops->notify_vq_gsi(kvm, vpci->dev,
- vpci->queue_selector, gsi);
+ vpci->queue_selector,
+ gsi);
break;
};
@@ -252,24 +308,36 @@
u8 is_write, void *ptr)
{
struct virtio_pci *vpci = ptr;
- void *table;
- u32 offset;
+ struct msix_table *table;
+ int vecnum;
+ size_t offset;
if (addr > vpci->msix_io_block + PCI_IO_SIZE) {
- table = &vpci->msix_pba;
- offset = vpci->msix_io_block + PCI_IO_SIZE;
+ if (is_write)
+ return;
+ table = (struct msix_table *)&vpci->msix_pba;
+ offset = addr - (vpci->msix_io_block + PCI_IO_SIZE);
} else {
- table = &vpci->msix_table;
- offset = vpci->msix_io_block;
+ table = vpci->msix_table;
+ offset = addr - vpci->msix_io_block;
+ }
+ vecnum = offset / sizeof(struct msix_table);
+ offset = offset % sizeof(struct msix_table);
+
+ if (!is_write) {
+ memcpy(data, (void *)&table[vecnum] + offset, len);
+ return;
}
- if (is_write)
- memcpy(table + addr - offset, data, len);
- else
- memcpy(data, table + addr - offset, len);
+ memcpy((void *)&table[vecnum] + offset, data, len);
+
+ /* Did we just update the address or payload? */
+ if (offset < offsetof(struct msix_table, ctrl))
+ update_msix_map(vpci, table, vecnum);
}
-static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, int vec)
+static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci,
+ int vec)
{
struct kvm_msi msi = {
.address_lo = vpci->msix_table[vec].msg.address_lo,
@@ -277,7 +345,12 @@
.data = vpci->msix_table[vec].msg.data,
};
- ioctl(kvm->vm_fd, KVM_SIGNAL_MSI, &msi);
+ if (kvm->msix_needs_devid) {
+ msi.flags = KVM_MSI_VALID_DEVID;
+ msi.devid = vpci->dev_hdr.dev_num << 3;
+ }
+
+ irq__signal_msi(kvm, &msi);
}
int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq)
@@ -415,7 +488,7 @@
vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE);
vpci->config_vector = 0;
- if (kvm__supports_extension(kvm, KVM_CAP_SIGNAL_MSI))
+ if (irq__can_signal_msi(kvm))
vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI;
r = device__register(&vpci->dev_hdr);
diff --git a/x86/include/asm/kvm.h b/x86/include/asm/kvm.h
index cd54147..739c0c5 100644
--- a/x86/include/asm/kvm.h
+++ b/x86/include/asm/kvm.h
@@ -216,9 +216,9 @@
__u32 padding[3];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX (1 << 0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC (1 << 1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT (1 << 2)
/* for KVM_SET_CPUID2 */
struct kvm_cpuid2 {
diff --git a/x86/include/kvm/fdt-arch.h b/x86/include/kvm/fdt-arch.h
new file mode 100644
index 0000000..eebd73f
--- /dev/null
+++ b/x86/include/kvm/fdt-arch.h
@@ -0,0 +1,6 @@
+#ifndef X86__FDT_ARCH_H
+#define X86__FDT_ARCH_H
+
+enum phandles {PHANDLE_RESERVED = 0, PHANDLES_MAX};
+
+#endif /* KVM__KVM_FDT_H */
diff --git a/x86/irq.c b/x86/irq.c
index 72177e7..db465a1 100644
--- a/x86/irq.c
+++ b/x86/irq.c
@@ -11,20 +11,15 @@
#include <stddef.h>
#include <stdlib.h>
-#define IRQ_MAX_GSI 64
#define IRQCHIP_MASTER 0
#define IRQCHIP_SLAVE 1
#define IRQCHIP_IOAPIC 2
-/* First 24 GSIs are routed between IRQCHIPs and IOAPICs */
-static u32 gsi = 24;
-
-struct kvm_irq_routing *irq_routing;
-
static int irq__add_routing(u32 gsi, u32 type, u32 irqchip, u32 pin)
{
- if (gsi >= IRQ_MAX_GSI)
- return -ENOSPC;
+ int r = irq__allocate_routing_entry();
+ if (r)
+ return r;
irq_routing->entries[irq_routing->nr++] =
(struct kvm_irq_routing_entry) {
@@ -41,11 +36,6 @@
{
int i, r;
- irq_routing = calloc(sizeof(struct kvm_irq_routing) +
- IRQ_MAX_GSI * sizeof(struct kvm_irq_routing_entry), 1);
- if (irq_routing == NULL)
- return -ENOMEM;
-
/* Hook first 8 GSIs to master IRQCHIP */
for (i = 0; i < 8; i++)
if (i != 2)
@@ -69,33 +59,8 @@
return errno;
}
+ next_gsi = i;
+
return 0;
}
dev_base_init(irq__init);
-
-int irq__exit(struct kvm *kvm)
-{
- free(irq_routing);
- return 0;
-}
-dev_base_exit(irq__exit);
-
-int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
-{
- int r;
-
- irq_routing->entries[irq_routing->nr++] =
- (struct kvm_irq_routing_entry) {
- .gsi = gsi,
- .type = KVM_IRQ_ROUTING_MSI,
- .u.msi.address_hi = msg->address_hi,
- .u.msi.address_lo = msg->address_lo,
- .u.msi.data = msg->data,
- };
-
- r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, irq_routing);
- if (r)
- return r;
-
- return gsi++;
-}
diff --git a/x86/kvm-cpu.c b/x86/kvm-cpu.c
index 5cc4e1e..b02ff65 100644
--- a/x86/kvm-cpu.c
+++ b/x86/kvm-cpu.c
@@ -4,7 +4,6 @@
#include "kvm/util.h"
#include "kvm/kvm.h"
-#include <asm/msr-index.h>
#include <asm/apicdef.h>
#include <linux/err.h>
#include <sys/ioctl.h>
@@ -136,6 +135,22 @@
return vcpu;
}
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
+
+#define MSR_IA32_TSC 0x00000010
+#define MSR_IA32_MISC_ENABLE 0x000001a0
+
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+
#define KVM_MSR_ENTRY(_index, _data) \
(struct kvm_msr_entry) { .index = _index, .data = _data }
diff --git a/x86/kvm.c b/x86/kvm.c
index bfa04b8..d8751e9 100644
--- a/x86/kvm.c
+++ b/x86/kvm.c
@@ -124,9 +124,9 @@
strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 "
"i8042.dumbkbd=1 i8042.nopnp=1");
if (video)
- strcat(cmdline, " video=vesafb console=tty0");
+ strcat(cmdline, " video=vesafb");
else
- strcat(cmdline, " console=ttyS0 earlyprintk=serial i8042.noaux=1");
+ strcat(cmdline, " earlyprintk=serial i8042.noaux=1");
}
/* Architecture-specific KVM init */