Re: [PATCH v5 3/3] vfio/pci: make use of irq_update_devid and optimize irq ops
在 2019/8/31 上午4:06, Alex Williamson 写道: On Fri, 30 Aug 2019 16:42:06 +0800 Ben Luo wrote: When userspace (e.g. qemu) triggers a switch between KVM irqfd and userspace eventfd, only dev_id of irqaction (i.e. the "trigger" in this patch's context) will be changed, but a free-then-request-irq action is taken in current code. And, irq affinity setting in VM will also trigger a free-then-request-irq action, which actually changes nothing, but only need to bounce the irqbypass registraion in case that posted-interrupt is in use. This patch makes use of irq_update_devid() and optimize both cases above, which reduces the risk of losing interrupt and also cuts some overhead. Signed-off-by: Ben Luo --- drivers/vfio/pci/vfio_pci_intrs.c | 124 ++ 1 file changed, 87 insertions(+), 37 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3fa3f72..d3a93d7 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -284,70 +284,120 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, int vector, int fd, bool msix) { + struct eventfd_ctx *trigger = NULL; struct pci_dev *pdev = vdev->pdev; - struct eventfd_ctx *trigger; int irq, ret; if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; + if (fd >= 0) { + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + /* oops, going to disable this interrupt */ + dev_info(>dev, +"get ctx error on bad fd: %d for vector:%d\n", +fd, vector); I think a user could trigger this maliciously as a denial of service by simply providing a bogus file descriptor. The user is informed of the error by the return value, why do we need to spam the logs? Ah, you are right, I will remove this log in next version + } + } + irq = pci_irq_vector(pdev, vector); + /* +* 'trigger' is NULL or invalid, disable the interrupt +* 'trigger' is same as before, only bounce the bypass registration +* 'trigger' is a new invalid one, update it to irqaction and other s/invalid/valid/ sorry for typo :) +* data structures referencing to the old one; fallback to disable +* the interrupt on error +*/ if (vdev->ctx[vector].trigger) { - free_irq(irq, vdev->ctx[vector].trigger); + /* +* even if the trigger is unchanged we need to bounce the +* interrupt bypass connection to allow affinity changes in +* the guest to be realized. +*/ irq_bypass_unregister_producer(>ctx[vector].producer); - kfree(vdev->ctx[vector].name); - eventfd_ctx_put(vdev->ctx[vector].trigger); - vdev->ctx[vector].trigger = NULL; + + if (vdev->ctx[vector].trigger == trigger) { + /* avoid duplicated referencing to the same trigger */ + eventfd_ctx_put(trigger); + + } else if (trigger && !IS_ERR(trigger)) { + ret = irq_update_devid(irq, + vdev->ctx[vector].trigger, trigger); + if (unlikely(ret)) { + dev_info(>dev, +"update devid of %d (token %p) failed: %d\n", +irq, vdev->ctx[vector].trigger, ret); + eventfd_ctx_put(trigger); + free_irq(irq, vdev->ctx[vector].trigger); + kfree(vdev->ctx[vector].name); + eventfd_ctx_put(vdev->ctx[vector].trigger); + vdev->ctx[vector].trigger = NULL; + return ret; + } + eventfd_ctx_put(vdev->ctx[vector].trigger); + vdev->ctx[vector].producer.token = trigger; + vdev->ctx[vector].trigger = trigger; + + } else { + free_irq(irq, vdev->ctx[vector].trigger); + kfree(vdev->ctx[vector].name); + eventfd_ctx_put(vdev->ctx[vector].trigger); + vdev->ctx[vector].trigger = NULL; + } } if (fd < 0) return 0; + else if (IS_ERR(trigger)) + return PTR_ERR(trigger); - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", - msix ? "x" : "", vector, -
Re: [PATCH v5 3/3] vfio/pci: make use of irq_update_devid and optimize irq ops
On Fri, 30 Aug 2019 16:42:06 +0800 Ben Luo wrote: > When userspace (e.g. qemu) triggers a switch between KVM > irqfd and userspace eventfd, only dev_id of irqaction > (i.e. the "trigger" in this patch's context) will be > changed, but a free-then-request-irq action is taken in > current code. And, irq affinity setting in VM will also > trigger a free-then-request-irq action, which actually > changes nothing, but only need to bounce the irqbypass > registraion in case that posted-interrupt is in use. > > This patch makes use of irq_update_devid() and optimize > both cases above, which reduces the risk of losing interrupt > and also cuts some overhead. > > Signed-off-by: Ben Luo > --- > drivers/vfio/pci/vfio_pci_intrs.c | 124 > ++ > 1 file changed, 87 insertions(+), 37 deletions(-) > > diff --git a/drivers/vfio/pci/vfio_pci_intrs.c > b/drivers/vfio/pci/vfio_pci_intrs.c > index 3fa3f72..d3a93d7 100644 > --- a/drivers/vfio/pci/vfio_pci_intrs.c > +++ b/drivers/vfio/pci/vfio_pci_intrs.c > @@ -284,70 +284,120 @@ static int vfio_msi_enable(struct vfio_pci_device > *vdev, int nvec, bool msix) > static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, > int vector, int fd, bool msix) > { > + struct eventfd_ctx *trigger = NULL; > struct pci_dev *pdev = vdev->pdev; > - struct eventfd_ctx *trigger; > int irq, ret; > > if (vector < 0 || vector >= vdev->num_ctx) > return -EINVAL; > > + if (fd >= 0) { > + trigger = eventfd_ctx_fdget(fd); > + if (IS_ERR(trigger)) { > + /* oops, going to disable this interrupt */ > + dev_info(>dev, > + "get ctx error on bad fd: %d for vector:%d\n", > + fd, vector); I think a user could trigger this maliciously as a denial of service by simply providing a bogus file descriptor. The user is informed of the error by the return value, why do we need to spam the logs? > + } > + } > + > irq = pci_irq_vector(pdev, vector); > > + /* > + * 'trigger' is NULL or invalid, disable the interrupt > + * 'trigger' is same as before, only bounce the bypass registration > + * 'trigger' is a new invalid one, update it to irqaction and other s/invalid/valid/ > + * data structures referencing to the old one; fallback to disable > + * the interrupt on error > + */ > if (vdev->ctx[vector].trigger) { > - free_irq(irq, vdev->ctx[vector].trigger); > + /* > + * even if the trigger is unchanged we need to bounce the > + * interrupt bypass connection to allow affinity changes in > + * the guest to be realized. > + */ > irq_bypass_unregister_producer(>ctx[vector].producer); > - kfree(vdev->ctx[vector].name); > - eventfd_ctx_put(vdev->ctx[vector].trigger); > - vdev->ctx[vector].trigger = NULL; > + > + if (vdev->ctx[vector].trigger == trigger) { > + /* avoid duplicated referencing to the same trigger */ > + eventfd_ctx_put(trigger); > + > + } else if (trigger && !IS_ERR(trigger)) { > + ret = irq_update_devid(irq, > +vdev->ctx[vector].trigger, > trigger); > + if (unlikely(ret)) { > + dev_info(>dev, > + "update devid of %d (token %p) failed: > %d\n", > + irq, vdev->ctx[vector].trigger, ret); > + eventfd_ctx_put(trigger); > + free_irq(irq, vdev->ctx[vector].trigger); > + kfree(vdev->ctx[vector].name); > + eventfd_ctx_put(vdev->ctx[vector].trigger); > + vdev->ctx[vector].trigger = NULL; > + return ret; > + } > + eventfd_ctx_put(vdev->ctx[vector].trigger); > + vdev->ctx[vector].producer.token = trigger; > + vdev->ctx[vector].trigger = trigger; > + > + } else { > + free_irq(irq, vdev->ctx[vector].trigger); > + kfree(vdev->ctx[vector].name); > + eventfd_ctx_put(vdev->ctx[vector].trigger); > + vdev->ctx[vector].trigger = NULL; > + } > } > > if (fd < 0) > return 0; > + else if (IS_ERR(trigger)) > + return PTR_ERR(trigger); > > - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", > -msix ? "x" : "", vector, > -pci_name(pdev)); > - if
[PATCH v5 3/3] vfio/pci: make use of irq_update_devid and optimize irq ops
When userspace (e.g. qemu) triggers a switch between KVM irqfd and userspace eventfd, only dev_id of irqaction (i.e. the "trigger" in this patch's context) will be changed, but a free-then-request-irq action is taken in current code. And, irq affinity setting in VM will also trigger a free-then-request-irq action, which actually changes nothing, but only need to bounce the irqbypass registraion in case that posted-interrupt is in use. This patch makes use of irq_update_devid() and optimize both cases above, which reduces the risk of losing interrupt and also cuts some overhead. Signed-off-by: Ben Luo --- drivers/vfio/pci/vfio_pci_intrs.c | 124 ++ 1 file changed, 87 insertions(+), 37 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3fa3f72..d3a93d7 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -284,70 +284,120 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, int vector, int fd, bool msix) { + struct eventfd_ctx *trigger = NULL; struct pci_dev *pdev = vdev->pdev; - struct eventfd_ctx *trigger; int irq, ret; if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; + if (fd >= 0) { + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + /* oops, going to disable this interrupt */ + dev_info(>dev, +"get ctx error on bad fd: %d for vector:%d\n", +fd, vector); + } + } + irq = pci_irq_vector(pdev, vector); + /* +* 'trigger' is NULL or invalid, disable the interrupt +* 'trigger' is same as before, only bounce the bypass registration +* 'trigger' is a new invalid one, update it to irqaction and other +* data structures referencing to the old one; fallback to disable +* the interrupt on error +*/ if (vdev->ctx[vector].trigger) { - free_irq(irq, vdev->ctx[vector].trigger); + /* +* even if the trigger is unchanged we need to bounce the +* interrupt bypass connection to allow affinity changes in +* the guest to be realized. +*/ irq_bypass_unregister_producer(>ctx[vector].producer); - kfree(vdev->ctx[vector].name); - eventfd_ctx_put(vdev->ctx[vector].trigger); - vdev->ctx[vector].trigger = NULL; + + if (vdev->ctx[vector].trigger == trigger) { + /* avoid duplicated referencing to the same trigger */ + eventfd_ctx_put(trigger); + + } else if (trigger && !IS_ERR(trigger)) { + ret = irq_update_devid(irq, + vdev->ctx[vector].trigger, trigger); + if (unlikely(ret)) { + dev_info(>dev, +"update devid of %d (token %p) failed: %d\n", +irq, vdev->ctx[vector].trigger, ret); + eventfd_ctx_put(trigger); + free_irq(irq, vdev->ctx[vector].trigger); + kfree(vdev->ctx[vector].name); + eventfd_ctx_put(vdev->ctx[vector].trigger); + vdev->ctx[vector].trigger = NULL; + return ret; + } + eventfd_ctx_put(vdev->ctx[vector].trigger); + vdev->ctx[vector].producer.token = trigger; + vdev->ctx[vector].trigger = trigger; + + } else { + free_irq(irq, vdev->ctx[vector].trigger); + kfree(vdev->ctx[vector].name); + eventfd_ctx_put(vdev->ctx[vector].trigger); + vdev->ctx[vector].trigger = NULL; + } } if (fd < 0) return 0; + else if (IS_ERR(trigger)) + return PTR_ERR(trigger); - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", - msix ? "x" : "", vector, - pci_name(pdev)); - if (!vdev->ctx[vector].name) - return -ENOMEM; + if (!vdev->ctx[vector].trigger) { + vdev->ctx[vector].name = kasprintf(GFP_KERNEL, + "vfio-msi%s[%d](%s)", + msix ? "x" : "", vector, +