On 2019/8/8 20:07, Ben Luo wrote:
> When userspace (e.g. qemu) triggers a switch between KVM
> irqfd and userspace eventfd, only dev_id of irq action
> (i.e. the "trigger" in this patch's context) will be
> changed, but a free-then-request-irq action is taken in
> current code. And, irq affinity setting in VM will also
> trigger a free-then-request-irq action, which actully
> changes nothing, but only fires a producer re-registration
> to update irte in case that posted-interrupt is in use.
> 
> This patch makes use of update_irq_devid() and optimize
> both cases above, which reduces the risk of losing interrupt
> and also cuts some overhead.
> 
> Signed-off-by: Ben Luo <[email protected]>
> Reviewed-by: Liu Jiang <[email protected]>
> ---
>  drivers/vfio/pci/vfio_pci_intrs.c | 100 
> +++++++++++++++++++++++---------------
>  1 file changed, 62 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
> b/drivers/vfio/pci/vfio_pci_intrs.c
> index 3fa3f72..1323dc8 100644
> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> @@ -285,69 +285,93 @@ static int vfio_msi_set_vector_signal(struct 
> vfio_pci_device *vdev,
>                                     int vector, int fd, bool msix)
>  {
>       struct pci_dev *pdev = vdev->pdev;
> -     struct eventfd_ctx *trigger;
> +     struct eventfd_ctx *trigger = NULL;

struct eventfd_ctx *trigger = NULL;
struct pci_dev *pdev = vdev->pdev;

to maintain reverse christmas tree?

>       int irq, ret;
>  
>       if (vector < 0 || vector >= vdev->num_ctx)
>               return -EINVAL;
>  
> +     if (fd >= 0) {
> +             trigger = eventfd_ctx_fdget(fd);
> +             if (IS_ERR(trigger))
> +                     return PTR_ERR(trigger);

It seems vdev->ctx[vector].trigger is freed even if  fd < 0 before
this patch. If it return here, vdev->ctx[vector].trigger is not free?

> +     }
> +
>       irq = pci_irq_vector(pdev, vector);
>  
>       if (vdev->ctx[vector].trigger) {
> -             free_irq(irq, vdev->ctx[vector].trigger);
> -             irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
> -             kfree(vdev->ctx[vector].name);
> -             eventfd_ctx_put(vdev->ctx[vector].trigger);
> -             vdev->ctx[vector].trigger = NULL;
> +             if (vdev->ctx[vector].trigger != trigger) {
> +                     if (trigger) {
> +                             ret = update_irq_devid(irq,
> +                                             vdev->ctx[vector].trigger, 
> trigger);
> +                             if (unlikely(ret)) {
> +                                     dev_info(&pdev->dev,
> +                                                     "update_irq_devid %d 
> (token %p) fails: %d\n",
> +                                                     irq, 
> vdev->ctx[vector].trigger, ret);
> +                                     eventfd_ctx_put(trigger);
> +                                     return ret;
> +                             }
> +                             
> irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
> +                             eventfd_ctx_put(vdev->ctx[vector].trigger);
> +                             vdev->ctx[vector].producer.token = trigger;
> +                             vdev->ctx[vector].trigger = trigger;
> +                     } else {
> +                             free_irq(irq, vdev->ctx[vector].trigger);
> +                             
> irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
> +                             kfree(vdev->ctx[vector].name);
> +                             eventfd_ctx_put(vdev->ctx[vector].trigger);
> +                             vdev->ctx[vector].trigger = NULL;
> +                     }
> +             } else
> +                     
> irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
>       }

Maybe adjust it a litte to reduce indent and to improve readability?

        if (vdev->ctx[vector].trigger && vdev->ctx[vector].trigger == trigger) {
                irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
        } else if (vdev->ctx[vector].trigger && !trigger) {
                free_irq(irq, vdev->ctx[vector].trigger);
                irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
                kfree(vdev->ctx[vector].name);
                eventfd_ctx_put(vdev->ctx[vector].trigger);
                vdev->ctx[vector].trigger = NULL;
        } else if (vdev->ctx[vector].trigger) {
                ret = update_irq_devid(irq, vdev->ctx[vector].trigger, trigger);
                if (unlikely(ret)) {
                        dev_info(&pdev->dev,
                                 "update_irq_devid %d (token %p) fails: %d\n",
                                 irq, vdev->ctx[vector].trigger, ret);
                                 eventfd_ctx_put(trigger);
                                 return ret;
                }
                irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
                eventfd_ctx_put(vdev->ctx[vector].trigger);
                vdev->ctx[vector].producer.token = trigger;
                vdev->ctx[vector].trigger = trigger;
        }


>  
>       if (fd < 0)
>               return 0;
>  
> -     vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
> -                                        msix ? "x" : "", vector,
> -                                        pci_name(pdev));
> -     if (!vdev->ctx[vector].name)
> -             return -ENOMEM;
> +     if (vdev->ctx[vector].trigger == NULL) {

It may be common to use the below check to do NULL checking:
If (!vdev->ctx[vector].trigger)


> +             vdev->ctx[vector].name = kasprintf(GFP_KERNEL, 
> "vfio-msi%s[%d](%s)",
> +                                                msix ? "x" : "", vector,
> +                                                pci_name(pdev));
> +             if (!vdev->ctx[vector].name) {
> +                     eventfd_ctx_put(trigger);
> +                     return -ENOMEM;
> +             }
>  
> -     trigger = eventfd_ctx_fdget(fd);
> -     if (IS_ERR(trigger)) {
> -             kfree(vdev->ctx[vector].name);
> -             return PTR_ERR(trigger);
> -     }
> +             /*
> +              * The MSIx vector table resides in device memory which may be 
> cleared
> +              * via backdoor resets. We don't allow direct access to the 
> vector
> +              * table so even if a userspace driver attempts to save/restore 
> around
> +              * such a reset it would be unsuccessful. To avoid this, 
> restore the
> +              * cached value of the message prior to enabling.
> +              */
> +             if (msix) {
> +                     struct msi_msg msg;
>  
> -     /*
> -      * The MSIx vector table resides in device memory which may be cleared
> -      * via backdoor resets. We don't allow direct access to the vector
> -      * table so even if a userspace driver attempts to save/restore around
> -      * such a reset it would be unsuccessful. To avoid this, restore the
> -      * cached value of the message prior to enabling.
> -      */
> -     if (msix) {
> -             struct msi_msg msg;
> +                     get_cached_msi_msg(irq, &msg);
> +                     pci_write_msi_msg(irq, &msg);
> +             }
>  
> -             get_cached_msi_msg(irq, &msg);
> -             pci_write_msi_msg(irq, &msg);
> -     }
> +             ret = request_irq(irq, vfio_msihandler, 0,
> +                               vdev->ctx[vector].name, trigger);
> +             if (ret) {
> +                     kfree(vdev->ctx[vector].name);
> +                     eventfd_ctx_put(trigger);
> +                     return ret;
> +             }
>  
> -     ret = request_irq(irq, vfio_msihandler, 0,
> -                       vdev->ctx[vector].name, trigger);
> -     if (ret) {
> -             kfree(vdev->ctx[vector].name);
> -             eventfd_ctx_put(trigger);
> -             return ret;
> +             vdev->ctx[vector].producer.token = trigger;
> +             vdev->ctx[vector].producer.irq = irq;
> +             vdev->ctx[vector].trigger = trigger;
>       }
>  
> -     vdev->ctx[vector].producer.token = trigger;
> -     vdev->ctx[vector].producer.irq = irq;
> +     /* always update irte for posted mode */
>       ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
>       if (unlikely(ret))
>               dev_info(&pdev->dev,
>               "irq bypass producer (token %p) registration fails: %d\n",
>               vdev->ctx[vector].producer.token, ret);
>  
> -     vdev->ctx[vector].trigger = trigger;
> -
>       return 0;
>  }
>  
> 

Reply via email to