In split irqchip mode, IOAPIC is working in user space, only update kernel irq routes when entry changed. When IR is enabled, we directly update the kernel with translated messages. It works just like a kernel cache for the remapping entries.
Since KVM irqfd is using kernel gsi routes to deliver interrupts, as long as we can support split irqchip, we will support irqfd as well. Also, since kernel gsi routes will cache translated interrupts, irqfd delivery will not suffer from any performance impact due to IR. And, since we supported irqfd, vhost devices will be able to work seamlessly with IR now. Logically this should contain both vhost-net and vhost-user case. Signed-off-by: Peter Xu <pet...@redhat.com> --- hw/i386/intel_iommu.c | 7 +++++++ include/hw/i386/intel_iommu.h | 1 + include/hw/i386/x86-iommu.h | 4 ++++ target-i386/kvm.c | 27 +++++++++++++++++++++++++++ trace-events | 3 +++ 5 files changed, 42 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index e832780..a6bfd66 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2146,6 +2146,12 @@ do_not_translate: return 0; } +static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid) +{ + return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), src, dst); +} + static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, uint64_t *data, unsigned size, MemTxAttrs attrs) @@ -2374,6 +2380,7 @@ static void vtd_class_init(ObjectClass *klass, void *data) dc->props = vtd_properties; x86_class->realize = vtd_realize; x86_class->find_add_as = vtd_find_add_as; + x86_class->int_remap = vtd_int_remap; } static const TypeInfo vtd_info = { diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index b3f17d7..3bca390 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -26,6 +26,7 @@ #include "hw/i386/x86-iommu.h" #include "hw/i386/ioapic.h" #include "hw/pci/msi.h" +#include "hw/sysbus.h" #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" #define INTEL_IOMMU_DEVICE(obj) \ diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 2070cd1..1eb62cf 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -22,6 +22,7 @@ #include "hw/sysbus.h" #include "exec/memory.h" +#include "hw/pci/pci.h" #define TYPE_X86_IOMMU_DEVICE ("x86-iommu") #define X86_IOMMU_DEVICE(obj) \ @@ -43,6 +44,9 @@ struct X86IOMMUClass { DeviceRealize realize; /* Find/Add IOMMU address space for specific PCI device */ AddressSpace *(*find_add_as)(X86IOMMUState *s, PCIBus *bus, int devfn); + /* MSI-based interrupt remapping */ + int (*int_remap)(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid); }; struct X86IOMMUState { diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7b3667a..ef10ccb 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -36,6 +36,7 @@ #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" #include "hw/i386/apic-msidef.h" +#include "hw/i386/intel_iommu.h" #include "exec/ioport.h" #include "standard-headers/asm-x86/hyperv.h" @@ -43,6 +44,7 @@ #include "hw/pci/msi.h" #include "migration/migration.h" #include "exec/memattrs.h" +#include "trace.h" //#define DEBUG_KVM @@ -3318,6 +3320,31 @@ int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id) int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev) { + X86IOMMUState *iommu = x86_iommu_get_default(); + + if (iommu) { + int ret; + MSIMessage src, dst; + X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu); + + src.address = route->u.msi.address_hi; + src.address <<= VTD_MSI_ADDR_HI_SHIFT; + src.address |= route->u.msi.address_lo; + src.data = route->u.msi.data; + + ret = class->int_remap(iommu, &src, &dst, dev ? \ + pci_requester_id(dev) : \ + X86_IOMMU_SID_INVALID); + if (ret) { + trace_kvm_x86_fixup_msi_error(route->gsi); + return 1; + } + + route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; + route->u.msi.data = dst.data; + } + return 0; } diff --git a/trace-events b/trace-events index b27d1da..54c0d41 100644 --- a/trace-events +++ b/trace-events @@ -1946,3 +1946,6 @@ gic_set_irq(int irq, int level, int cpumask, int target) "irq %d level %d cpumas gic_update_bestirq(int cpu, int irq, int prio, int priority_mask, int running_priority) "cpu %d irq %d priority %d cpu priority mask %d cpu running priority %d" gic_update_set_irq(int cpu, const char *name, int level) "cpu[%d]: %s = %d" gic_acknowledge_irq(int cpu, int irq) "cpu %d acknowledged irq %d" + +# target-i386/kvm.c +kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 -- 2.4.11