[PATCH v2 3/5] virtio-mmio: create a generic MSI irq domain
From: Liu Jiang Create a generic irq domain for all architectures which supports virtio-mmio. The device offering VIRTIO_F_MMIO_MSI feature bit can use this irq domain. Signed-off-by: Liu Jiang Co-developed-by: Zha Bin Signed-off-by: Zha Bin Co-developed-by: Jing Liu Signed-off-by: Jing Liu Co-developed-by: Chao Peng Signed-off-by: Chao Peng --- drivers/base/platform-msi.c | 4 +- drivers/virtio/Kconfig | 9 drivers/virtio/virtio_mmio_msi.h | 93 include/linux/msi.h | 1 + 4 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 drivers/virtio/virtio_mmio_msi.h diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 8da314b..45752f1 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -31,12 +31,11 @@ struct platform_msi_priv_data { /* The devid allocator */ static DEFINE_IDA(platform_msi_devid_ida); -#ifdef GENERIC_MSI_DOMAIN_OPS /* * Convert an msi_desc to a globaly unique identifier (per-device * devid + msi_desc position in the msi_list). */ -static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc) +irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc) { u32 devid; @@ -45,6 +44,7 @@ static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc) return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index; } +#ifdef GENERIC_MSI_DOMAIN_OPS static void platform_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) { arg->desc = desc; diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 078615c..551a9f7 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -84,6 +84,15 @@ config VIRTIO_MMIO If unsure, say N. +config VIRTIO_MMIO_MSI + bool "Memory-mapped virtio device MSI" + depends on VIRTIO_MMIO && GENERIC_MSI_IRQ_DOMAIN && GENERIC_MSI_IRQ + help +This allows device drivers to support msi interrupt handling for +virtio-mmio devices. It can improve performance greatly. + +If unsure, say N. + config VIRTIO_MMIO_CMDLINE_DEVICES bool "Memory mapped virtio devices parameter parsing" depends on VIRTIO_MMIO diff --git a/drivers/virtio/virtio_mmio_msi.h b/drivers/virtio/virtio_mmio_msi.h new file mode 100644 index 000..27cb2af --- /dev/null +++ b/drivers/virtio/virtio_mmio_msi.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _DRIVERS_VIRTIO_VIRTIO_MMIO_MSI_H +#define _DRIVERS_VIRTIO_VIRTIO_MMIO_MSI_H + +#ifdef CONFIG_VIRTIO_MMIO_MSI + +#include +#include +#include +#include + +static irq_hw_number_t mmio_msi_hwirq; +static struct irq_domain *mmio_msi_domain; + +struct irq_domain *__weak arch_msi_root_irq_domain(void) +{ + return NULL; +} + +void __weak irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ +} + +static void mmio_msi_mask_irq(struct irq_data *data) +{ +} + +static void mmio_msi_unmask_irq(struct irq_data *data) +{ +} + +static struct irq_chip mmio_msi_controller = { + .name = "VIRTIO-MMIO-MSI", + .irq_mask = mmio_msi_mask_irq, + .irq_unmask = mmio_msi_unmask_irq, + .irq_ack= irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg= irq_msi_compose_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static int mmio_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + memset(arg, 0, sizeof(*arg)); + return 0; +} + +static void mmio_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + mmio_msi_hwirq = platform_msi_calc_hwirq(desc); +} + +static irq_hw_number_t mmio_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return mmio_msi_hwirq; +} + +static struct msi_domain_ops mmio_msi_domain_ops = { + .msi_prepare= mmio_msi_prepare, + .set_desc = mmio_msi_set_desc, + .get_hwirq = mmio_msi_get_hwirq, +}; + +static struct msi_domain_info mmio_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | + MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_ACTIVATE_EARLY, + .ops= &mmio_msi_domain_ops, + .chip = &mmio_msi_controller, + .handler= handle_edge_irq, + .handler_name = "edge", +}; + +static inline void mmio_msi_create_irq_domain(void) +{ + struct fwnode_handle *fn; + struct irq_domain *parent = arch_msi_root_irq_domain(); + + fn = irq_domain_alloc_named_fwnode("VIRTIO-MMIO-MSI"); + if (fn && parent)
[PATCH v2 1/5] virtio-mmio: add notify feature for per-queue
From: Liu Jiang The standard virtio-mmio devices use notification register to signal backend. This will cause vmexits and slow down the performance when we passthrough the virtio-mmio devices to guest virtual machines. We proposed to update virtio over MMIO spec to add the per-queue notify feature VIRTIO_F_MMIO_NOTIFICATION[1]. It can allow the VMM to configure notify location for each queue. [1] https://lkml.org/lkml/2020/1/21/31 Signed-off-by: Liu Jiang Co-developed-by: Zha Bin Signed-off-by: Zha Bin Co-developed-by: Jing Liu Signed-off-by: Jing Liu Co-developed-by: Chao Peng Signed-off-by: Chao Peng --- drivers/virtio/virtio_mmio.c | 37 +++-- include/uapi/linux/virtio_config.h | 8 +++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 97d5725..1733ab97 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -90,6 +90,9 @@ struct virtio_mmio_device { /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; + + unsigned short notify_base; + unsigned short notify_multiplier; }; struct virtio_mmio_vq_info { @@ -98,6 +101,9 @@ struct virtio_mmio_vq_info { /* the list node for the virtqueues list */ struct list_head node; + + /* Notify Address*/ + unsigned int notify_addr; }; @@ -119,13 +125,23 @@ static u64 vm_get_features(struct virtio_device *vdev) return features; } +static void vm_transport_features(struct virtio_device *vdev, u64 features) +{ + if (features & BIT_ULL(VIRTIO_F_MMIO_NOTIFICATION)) + __virtio_set_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION); +} + static int vm_finalize_features(struct virtio_device *vdev) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + u64 features = vdev->features; /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Give virtio_mmio a chance to accept features. */ + vm_transport_features(vdev, features); + /* Make sure there is are no mixed devices */ if (vm_dev->version == 2 && !__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) { @@ -272,10 +288,13 @@ static void vm_reset(struct virtio_device *vdev) static bool vm_notify(struct virtqueue *vq) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); + struct virtio_mmio_vq_info *info = vq->priv; - /* We write the queue's selector into the notification register to + /* We write the queue's selector into the Notify Address to * signal the other end */ - writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); + if (info) + writel(vq->index, vm_dev->base + info->notify_addr); + return true; } @@ -434,6 +453,12 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, vq->priv = info; info->vq = vq; + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION)) + info->notify_addr = vm_dev->notify_base + + vm_dev->notify_multiplier * vq->index; + else + info->notify_addr = VIRTIO_MMIO_QUEUE_NOTIFY; + spin_lock_irqsave(&vm_dev->lock, flags); list_add(&info->node, &vm_dev->virtqueues); spin_unlock_irqrestore(&vm_dev->lock, flags); @@ -471,6 +496,14 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, return irq; } + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION)) { + unsigned int notify = readl(vm_dev->base + + VIRTIO_MMIO_QUEUE_NOTIFY); + + vm_dev->notify_base = notify & 0x; + vm_dev->notify_multiplier = (notify >> 16) & 0x; + } + err = request_irq(irq, vm_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vm_dev); if (err) diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index ff8e7dc..5d93c01 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 38 +#define VIRTIO_TRANSPORT_F_END 40 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -88,4 +88,10 @@ * Does the device support Single Root I/O Virtualization? */ #define VIRTIO_F_SR_IOV37 + +/* + * This feature indicates the enhanced notification support on MMIO transport + * layer. + */ +#define VIRTIO_F_MMIO_NOTIFICATION 39 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- 1.8.3.1
[PATCH v2 0/5] virtio mmio specification enhancement
In cloud native environment, we need a lightweight and secure system. It should benefit from the speed of containers and the security of VM, which is classified as secure containers. The traditional solution of cloud VM is Qemu. In fact we don't need to pay for the legacy devices. Currently, more userspace VMMs, e.g. Qemu, Firecracker, Cloud Hypervisor and Alibaba Cloud VMM which is called Dragonball, began to pay attention to a lightweight solution. The lightweight VMM is suitable to cloud native infrastructure which is designed for creating secure sandbox to address the requirements of multi-tenant. Meanwhile, with faster startup time and lower memory overhead, it makes possible to launch thousands of microVMs on the same machine. This VMM minimizes the emulation devices and uses virtio-mmio to get a more lightweight transport layer. The virtio-mmio devices have less code than virtio-pci, which can decrease boot time and increase deploy density by customizing kernel such as setting pci=off. From another point of view, the minimal device can reduce the attack surface. We have compared the number of files and the lines of code between virtio-mmio and virio-pci. Virtio-PCI Virtio-MMIO number of files(Linux) 161 1 lines of code(Linux)78237 538 number of files(Qemu) 24 1 lines of code(Qemu) 8952421 But the current standard virtio-mmio spec has some limitations which is only support legacy interrupt and will cause performance penalties. To address such limitation, we proposed to update virtio-mmio spec with two new feature bits to support MSI interrupt and enhancing notification mechanism[1], which can achieve the same performance as virtio-pci devices with only around 600 lines of code. Here are the performance gain of MSI interrupt in virtio-mmio. Each case is repeated three times. netperf -t TCP_RR -H 192.168.1.36 -l 30 -- -r 32,1024 Virtio-PCIVirtio-MMIO Virtio-MMIO(MSI) trans/s 953669399500 trans/s 973470299749 trans/s 989470959318 With the virtio spec proposal[1], other VMMs (e.g. Qemu) can also make use of the new features to get a enhancing performance. [1] https://lkml.org/lkml/2020/1/21/31 Change Log: v1->v2 * Change version update to feature bit * Add mask/unmask support * Add two MSI sharing/non-sharing modes * Create generic irq domain for all architectures Liu Jiang (5): virtio-mmio: add notify feature for per-queue virtio-mmio: refactor common functionality virtio-mmio: create a generic MSI irq domain virtio-mmio: add MSI interrupt feature support x86: virtio-mmio: support virtio-mmio with MSI for x86 arch/x86/kernel/apic/msi.c | 11 +- drivers/base/platform-msi.c | 4 +- drivers/virtio/Kconfig | 9 + drivers/virtio/virtio_mmio.c| 351 drivers/virtio/virtio_mmio_common.h | 39 drivers/virtio/virtio_mmio_msi.h| 175 ++ include/linux/msi.h | 1 + include/uapi/linux/virtio_config.h | 13 +- include/uapi/linux/virtio_mmio.h| 31 9 files changed, 596 insertions(+), 38 deletions(-) create mode 100644 drivers/virtio/virtio_mmio_common.h create mode 100644 drivers/virtio/virtio_mmio_msi.h -- 1.8.3.1
[PATCH v2 5/5] x86: virtio-mmio: support virtio-mmio with MSI for x86
From: Liu Jiang virtio-mmio supports a generic MSI irq domain for all archs. This patch adds the x86 architecture support. Signed-off-by: Liu Jiang Co-developed-by: Zha Bin Signed-off-by: Zha Bin Co-developed-by: Jing Liu Signed-off-by: Jing Liu Co-developed-by: Chao Peng Signed-off-by: Chao Peng --- arch/x86/kernel/apic/msi.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 159bd0c..2fcd602 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -45,7 +45,11 @@ static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +/* + * x86 PCI-MSI/HPET/DMAR related method. + * Also can be used as arch specific method for virtio-mmio MSI. + */ +void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { __irq_msi_compose_msg(irqd_cfg(data), msg); } @@ -166,6 +170,11 @@ static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) return ret; } +struct irq_domain *arch_msi_root_irq_domain(void) +{ + return x86_vector_domain; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. -- 1.8.3.1
[PATCH v2 2/5] virtio-mmio: refactor common functionality
From: Liu Jiang Common functionality is refactored into virtio_mmio_common.h in order to MSI support in later patch set. Signed-off-by: Liu Jiang Co-developed-by: Zha Bin Signed-off-by: Zha Bin Co-developed-by: Jing Liu Signed-off-by: Jing Liu Co-developed-by: Chao Peng Signed-off-by: Chao Peng --- drivers/virtio/virtio_mmio.c| 21 + drivers/virtio/virtio_mmio_common.h | 31 +++ 2 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 drivers/virtio/virtio_mmio_common.h diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 1733ab97..41e1c93 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -61,13 +61,12 @@ #include #include #include -#include #include #include -#include #include #include #include +#include "virtio_mmio_common.h" @@ -77,24 +76,6 @@ -#define to_virtio_mmio_device(_plat_dev) \ - container_of(_plat_dev, struct virtio_mmio_device, vdev) - -struct virtio_mmio_device { - struct virtio_device vdev; - struct platform_device *pdev; - - void __iomem *base; - unsigned long version; - - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; - - unsigned short notify_base; - unsigned short notify_multiplier; -}; - struct virtio_mmio_vq_info { /* the actual virtqueue */ struct virtqueue *vq; diff --git a/drivers/virtio/virtio_mmio_common.h b/drivers/virtio/virtio_mmio_common.h new file mode 100644 index 000..90cb304 --- /dev/null +++ b/drivers/virtio/virtio_mmio_common.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _DRIVERS_VIRTIO_VIRTIO_MMIO_COMMON_H +#define _DRIVERS_VIRTIO_VIRTIO_MMIO_COMMON_H +/* + * Virtio MMIO driver - common functionality for all device versions + * + * This module allows virtio devices to be used over a memory-mapped device. + */ + +#include +#include + +#define to_virtio_mmio_device(_plat_dev) \ + container_of(_plat_dev, struct virtio_mmio_device, vdev) + +struct virtio_mmio_device { + struct virtio_device vdev; + struct platform_device *pdev; + + void __iomem *base; + unsigned long version; + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; + + unsigned short notify_base; + unsigned short notify_multiplier; +}; + +#endif -- 1.8.3.1
[PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
From: Liu Jiang Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using virtio over mmio devices as a lightweight machine model for modern cloud. The standard virtio over MMIO transport layer only supports one legacy interrupt, which is much heavier than virtio over PCI transport layer using MSI. Legacy interrupt has long work path and causes specific VMExits in following cases, which would considerably slow down the performance: 1) read interrupt status register 2) update interrupt status register 3) write IOAPIC EOI register We proposed to add MSI support for virtio over MMIO via new feature bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance. With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI uses msi_sharing[1] to indicate the event and vector mapping. Bit 1 is 0: device uses non-sharing and fixed vector per event mapping. Bit 1 is 1: device uses sharing mode and dynamic mapping. [1] https://lkml.org/lkml/2020/1/21/31 Signed-off-by: Liu Jiang Co-developed-by: Zha Bin Signed-off-by: Zha Bin Co-developed-by: Jing Liu Signed-off-by: Jing Liu Co-developed-by: Chao Peng Signed-off-by: Chao Peng --- drivers/virtio/virtio_mmio.c| 299 ++-- drivers/virtio/virtio_mmio_common.h | 8 + drivers/virtio/virtio_mmio_msi.h| 82 ++ include/uapi/linux/virtio_config.h | 7 +- include/uapi/linux/virtio_mmio.h| 31 5 files changed, 411 insertions(+), 16 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 41e1c93..b24ce21 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -67,8 +67,7 @@ #include #include #include "virtio_mmio_common.h" - - +#include "virtio_mmio_msi.h" /* The alignment to use between consumer and producer parts of vring. * Currently hardcoded to the page size. */ @@ -85,9 +84,13 @@ struct virtio_mmio_vq_info { /* Notify Address*/ unsigned int notify_addr; -}; + /* MSI vector (or none) */ + unsigned int msi_vector; +}; +static void vm_free_msi_irqs(struct virtio_device *vdev); +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs); /* Configuration interface */ @@ -110,6 +113,9 @@ static void vm_transport_features(struct virtio_device *vdev, u64 features) { if (features & BIT_ULL(VIRTIO_F_MMIO_NOTIFICATION)) __virtio_set_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION); + + if (features & BIT_ULL(VIRTIO_F_MMIO_MSI)) + __virtio_set_bit(vdev, VIRTIO_F_MMIO_MSI); } static int vm_finalize_features(struct virtio_device *vdev) @@ -307,7 +313,33 @@ static irqreturn_t vm_interrupt(int irq, void *opaque) return ret; } +static irqreturn_t vm_vring_interrupt(int irq, void *opaque) +{ + struct virtio_mmio_device *vm_dev = opaque; + struct virtio_mmio_vq_info *info; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + + spin_lock_irqsave(&vm_dev->lock, flags); + list_for_each_entry(info, &vm_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + spin_unlock_irqrestore(&vm_dev->lock, flags); + + return ret; +} + + +/* Handle a configuration change */ +static irqreturn_t vm_config_changed(int irq, void *opaque) +{ + struct virtio_mmio_device *vm_dev = opaque; + + virtio_config_changed(&vm_dev->vdev); + return IRQ_HANDLED; +} static void vm_del_vq(struct virtqueue *vq) { @@ -316,6 +348,15 @@ static void vm_del_vq(struct virtqueue *vq) unsigned long flags; unsigned int index = vq->index; + if (vm_dev->msi_enabled && !vm_dev->msi_share) { + if (info->msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR) { + int irq = mmio_msi_irq_vector(&vq->vdev->dev, + info->msi_vector); + + free_irq(irq, vq); + } + } + spin_lock_irqsave(&vm_dev->lock, flags); list_del(&info->node); spin_unlock_irqrestore(&vm_dev->lock, flags); @@ -334,20 +375,56 @@ static void vm_del_vq(struct virtqueue *vq) kfree(info); } -static void vm_del_vqs(struct virtio_device *vdev) +static void vm_free_irqs(struct virtio_device *vdev) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + if (vm_dev->msi_enabled) + vm_free_msi_irqs(vdev); + else + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); +} + +static void vm_del_vqs(struct virtio_device *vdev) +{ struct virtqueue *vq, *n; list_for_each_entry_safe(vq, n, &vdev->vqs, list) vm_del_vq(vq); - free_irq(platform_get_irq(vm_dev-