RE: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER
-Original Message- From: Alex Williamson [mailto:alex.william...@redhat.com] Sent: Wednesday, January 09, 2013 11:05 AM To: Pandarathil, Vijaymohan R Cc: Bjorn Helgaas; Gleb Natapov; kvm@vger.kernel.org; qemu- de...@nongnu.org; linux-...@vger.kernel.org; linux-ker...@vger.kernel.org Subject: Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER On Wed, 2013-01-09 at 10:52 -0700, Alex Williamson wrote: On Wed, 2013-01-09 at 06:26 +, Pandarathil, Vijaymohan R wrote: - New ioctl which is used to pass the eventfd that is signaled when an error occurs in the vfio_pci_device - Register pci_error_handler for the vfio_pci driver - When the device encounters an error, the error handler registered by the vfio_pci driver gets invoked by the AER infrastructure - In the error handler, signal the eventfd registered for the device. - This results in the qemu eventfd handler getting invoked and appropriate action taken for the guest. Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com --- drivers/vfio/pci/vfio_pci.c | 29 + drivers/vfio/pci/vfio_pci_private.h | 1 + drivers/vfio/vfio.c | 8 include/linux/vfio.h| 1 + include/uapi/linux/vfio.h | 9 + 5 files changed, 48 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c11994..4ae9526 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,8 @@ static long vfio_pci_ioctl(void *device_data, if (vdev-reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; + info.flags |= VFIO_DEVICE_FLAGS_AER_NOTIFY; + This appears to be a PCI specific flag, so the name should include _PCI_. We also support non-PCIe devices and it seems like it would be possible to not have AER support available, so shouldn't this be conditional? Will do that. info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; @@ -348,6 +350,19 @@ static long vfio_pci_ioctl(void *device_data, return ret; + } else if (cmd == VFIO_DEVICE_SET_ERRFD) { + int32_t fd = (int32_t)arg; + + if (fd 0) + return -EINVAL; + + vdev-err_trigger = eventfd_ctx_fdget(fd); + + if (IS_ERR(vdev-err_trigger)) + return PTR_ERR(vdev-err_trigger); + + return 0; + I'm not sure why we wouldn't describe this as just another interrupt from the device and configure it via SET_IRQ. This ioctl has very limited use and doesn't follow any of the conventions of all the other vfio ioctls. I thought this was a fairly simple ioctl to implement this way. Moreover, there is no device interrupt involved. Let me know if you really want to model it as a SET_IRQ ioctl. } else if (cmd == VFIO_DEVICE_RESET) return vdev-reset_works ? pci_reset_function(vdev-pdev) : -EINVAL; @@ -527,11 +542,25 @@ static void vfio_pci_remove(struct pci_dev *pdev) kfree(vdev); } +static pci_ers_result_t vfio_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct vfio_pci_device *vdev = vfio_get_vdev(pdev-dev); + + eventfd_signal(vdev-err_trigger, 1); + return PCI_ERS_RESULT_CAN_RECOVER; +} What if err_trigger hasn't been set? Will fix that. + +static const struct pci_error_handlers vfio_err_handlers = { + .error_detected = vfio_err_detected, +}; + static struct pci_driver vfio_pci_driver = { .name = vfio-pci, .id_table = NULL, /* only dynamic ids */ .probe = vfio_pci_probe, .remove = vfio_pci_remove, + .err_handler= vfio_err_handlers, }; static void __exit vfio_pci_cleanup(void) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 611827c..daee62f 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -55,6 +55,7 @@ struct vfio_pci_device { boolbardirty; struct pci_saved_state *pci_saved_state; atomic_trefcnt; + struct eventfd_ctx *err_trigger; }; #define is_intx(vdev) (vdev-irq_type == VFIO_PCI_INTX_IRQ_INDEX) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56097c6..5ed5a54 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -693,6 +693,14 @@ void *vfio_del_group_dev(struct device *dev) } EXPORT_SYMBOL_GPL(vfio_del_group_dev); +void *vfio_get_vdev(struct device *dev) +{ + struct vfio_device *device = dev_get_drvdata(dev); + + return
Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER
On Fri, 2013-01-11 at 08:45 +, Pandarathil, Vijaymohan R wrote: -Original Message- From: Alex Williamson [mailto:alex.william...@redhat.com] Sent: Wednesday, January 09, 2013 11:05 AM To: Pandarathil, Vijaymohan R Cc: Bjorn Helgaas; Gleb Natapov; kvm@vger.kernel.org; qemu- de...@nongnu.org; linux-...@vger.kernel.org; linux-ker...@vger.kernel.org Subject: Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER On Wed, 2013-01-09 at 10:52 -0700, Alex Williamson wrote: On Wed, 2013-01-09 at 06:26 +, Pandarathil, Vijaymohan R wrote: - New ioctl which is used to pass the eventfd that is signaled when an error occurs in the vfio_pci_device - Register pci_error_handler for the vfio_pci driver - When the device encounters an error, the error handler registered by the vfio_pci driver gets invoked by the AER infrastructure - In the error handler, signal the eventfd registered for the device. - This results in the qemu eventfd handler getting invoked and appropriate action taken for the guest. Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com --- drivers/vfio/pci/vfio_pci.c | 29 + drivers/vfio/pci/vfio_pci_private.h | 1 + drivers/vfio/vfio.c | 8 include/linux/vfio.h| 1 + include/uapi/linux/vfio.h | 9 + 5 files changed, 48 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c11994..4ae9526 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,8 @@ static long vfio_pci_ioctl(void *device_data, if (vdev-reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; + info.flags |= VFIO_DEVICE_FLAGS_AER_NOTIFY; + This appears to be a PCI specific flag, so the name should include _PCI_. We also support non-PCIe devices and it seems like it would be possible to not have AER support available, so shouldn't this be conditional? Will do that. info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; @@ -348,6 +350,19 @@ static long vfio_pci_ioctl(void *device_data, return ret; + } else if (cmd == VFIO_DEVICE_SET_ERRFD) { + int32_t fd = (int32_t)arg; + + if (fd 0) + return -EINVAL; + + vdev-err_trigger = eventfd_ctx_fdget(fd); + + if (IS_ERR(vdev-err_trigger)) + return PTR_ERR(vdev-err_trigger); + + return 0; + I'm not sure why we wouldn't describe this as just another interrupt from the device and configure it via SET_IRQ. This ioctl has very limited use and doesn't follow any of the conventions of all the other vfio ioctls. I thought this was a fairly simple ioctl to implement this way. Moreover, there is no device interrupt involved. Let me know if you really want to model it as a SET_IRQ ioctl. I'd like to see something else for sure. We're trying to do AER piecemeal but this ioctl leaves no room for anything else. Seems like a soon to be wasted ioctl number. Note that there isn't even a de-assign interface, nor a flag to set to indicate de-assignment. The SET_IRQ ioctl already handles these in a very similar way to how we'd want to handle it for error signaling. It doesn't seem like that much of a stretch to me to include it there, but I'd also entertain other ideas. Thanks, Alex } else if (cmd == VFIO_DEVICE_RESET) return vdev-reset_works ? pci_reset_function(vdev-pdev) : -EINVAL; @@ -527,11 +542,25 @@ static void vfio_pci_remove(struct pci_dev *pdev) kfree(vdev); } +static pci_ers_result_t vfio_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct vfio_pci_device *vdev = vfio_get_vdev(pdev-dev); + + eventfd_signal(vdev-err_trigger, 1); + return PCI_ERS_RESULT_CAN_RECOVER; +} What if err_trigger hasn't been set? Will fix that. + +static const struct pci_error_handlers vfio_err_handlers = { + .error_detected = vfio_err_detected, +}; + static struct pci_driver vfio_pci_driver = { .name = vfio-pci, .id_table = NULL, /* only dynamic ids */ .probe = vfio_pci_probe, .remove = vfio_pci_remove, + .err_handler= vfio_err_handlers, }; static void __exit vfio_pci_cleanup(void
Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER
On Fri, Jan 11, 2013 at 08:46:35AM -0700, Alex Williamson wrote: On Fri, 2013-01-11 at 08:45 +, Pandarathil, Vijaymohan R wrote: -Original Message- From: Alex Williamson [mailto:alex.william...@redhat.com] Sent: Wednesday, January 09, 2013 11:05 AM To: Pandarathil, Vijaymohan R Cc: Bjorn Helgaas; Gleb Natapov; kvm@vger.kernel.org; qemu- de...@nongnu.org; linux-...@vger.kernel.org; linux-ker...@vger.kernel.org Subject: Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER On Wed, 2013-01-09 at 10:52 -0700, Alex Williamson wrote: On Wed, 2013-01-09 at 06:26 +, Pandarathil, Vijaymohan R wrote: - New ioctl which is used to pass the eventfd that is signaled when an error occurs in the vfio_pci_device - Register pci_error_handler for the vfio_pci driver - When the device encounters an error, the error handler registered by the vfio_pci driver gets invoked by the AER infrastructure - In the error handler, signal the eventfd registered for the device. - This results in the qemu eventfd handler getting invoked and appropriate action taken for the guest. Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com --- drivers/vfio/pci/vfio_pci.c | 29 + drivers/vfio/pci/vfio_pci_private.h | 1 + drivers/vfio/vfio.c | 8 include/linux/vfio.h| 1 + include/uapi/linux/vfio.h | 9 + 5 files changed, 48 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c11994..4ae9526 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,8 @@ static long vfio_pci_ioctl(void *device_data, if (vdev-reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; + info.flags |= VFIO_DEVICE_FLAGS_AER_NOTIFY; + This appears to be a PCI specific flag, so the name should include _PCI_. We also support non-PCIe devices and it seems like it would be possible to not have AER support available, so shouldn't this be conditional? Will do that. info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; @@ -348,6 +350,19 @@ static long vfio_pci_ioctl(void *device_data, return ret; + } else if (cmd == VFIO_DEVICE_SET_ERRFD) { + int32_t fd = (int32_t)arg; + + if (fd 0) + return -EINVAL; + + vdev-err_trigger = eventfd_ctx_fdget(fd); + + if (IS_ERR(vdev-err_trigger)) + return PTR_ERR(vdev-err_trigger); + + return 0; + I'm not sure why we wouldn't describe this as just another interrupt from the device and configure it via SET_IRQ. This ioctl has very limited use and doesn't follow any of the conventions of all the other vfio ioctls. I thought this was a fairly simple ioctl to implement this way. Moreover, there is no device interrupt involved. Let me know if you really want to model it as a SET_IRQ ioctl. I'd like to see something else for sure. We're trying to do AER piecemeal but this ioctl leaves no room for anything else. Seems like a soon to be wasted ioctl number. Note that there isn't even a de-assign interface, nor a flag to set to indicate de-assignment. The SET_IRQ ioctl already handles these in a very similar way to how we'd want to handle it for error signaling. It doesn't seem like that much of a stretch to me to include it there, but I'd also entertain other ideas. Thanks, Alex Yes it would be good to have a picture of the dependencies necessary to implement passthrough, before introducing an incompatible interface. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER
On Wed, 2013-01-09 at 06:26 +, Pandarathil, Vijaymohan R wrote: - New ioctl which is used to pass the eventfd that is signaled when an error occurs in the vfio_pci_device - Register pci_error_handler for the vfio_pci driver - When the device encounters an error, the error handler registered by the vfio_pci driver gets invoked by the AER infrastructure - In the error handler, signal the eventfd registered for the device. - This results in the qemu eventfd handler getting invoked and appropriate action taken for the guest. Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com --- drivers/vfio/pci/vfio_pci.c | 29 + drivers/vfio/pci/vfio_pci_private.h | 1 + drivers/vfio/vfio.c | 8 include/linux/vfio.h| 1 + include/uapi/linux/vfio.h | 9 + 5 files changed, 48 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c11994..4ae9526 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,8 @@ static long vfio_pci_ioctl(void *device_data, if (vdev-reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; + info.flags |= VFIO_DEVICE_FLAGS_AER_NOTIFY; + This appears to be a PCI specific flag, so the name should include _PCI_. We also support non-PCIe devices and it seems like it would be possible to not have AER support available, so shouldn't this be conditional? info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; @@ -348,6 +350,19 @@ static long vfio_pci_ioctl(void *device_data, return ret; + } else if (cmd == VFIO_DEVICE_SET_ERRFD) { + int32_t fd = (int32_t)arg; + + if (fd 0) + return -EINVAL; + + vdev-err_trigger = eventfd_ctx_fdget(fd); + + if (IS_ERR(vdev-err_trigger)) + return PTR_ERR(vdev-err_trigger); + + return 0; + I'm not sure why we wouldn't describe this as just another interrupt from the device and configure it via SET_IRQ. This ioctl has very limited use and doesn't follow any of the conventions of all the other vfio ioctls. } else if (cmd == VFIO_DEVICE_RESET) return vdev-reset_works ? pci_reset_function(vdev-pdev) : -EINVAL; @@ -527,11 +542,25 @@ static void vfio_pci_remove(struct pci_dev *pdev) kfree(vdev); } +static pci_ers_result_t vfio_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct vfio_pci_device *vdev = vfio_get_vdev(pdev-dev); + + eventfd_signal(vdev-err_trigger, 1); + return PCI_ERS_RESULT_CAN_RECOVER; +} What if err_trigger hasn't been set? + +static const struct pci_error_handlers vfio_err_handlers = { + .error_detected = vfio_err_detected, +}; + static struct pci_driver vfio_pci_driver = { .name = vfio-pci, .id_table = NULL, /* only dynamic ids */ .probe = vfio_pci_probe, .remove = vfio_pci_remove, + .err_handler= vfio_err_handlers, }; static void __exit vfio_pci_cleanup(void) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 611827c..daee62f 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -55,6 +55,7 @@ struct vfio_pci_device { boolbardirty; struct pci_saved_state *pci_saved_state; atomic_trefcnt; + struct eventfd_ctx *err_trigger; }; #define is_intx(vdev) (vdev-irq_type == VFIO_PCI_INTX_IRQ_INDEX) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56097c6..5ed5a54 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -693,6 +693,14 @@ void *vfio_del_group_dev(struct device *dev) } EXPORT_SYMBOL_GPL(vfio_del_group_dev); +void *vfio_get_vdev(struct device *dev) +{ + struct vfio_device *device = dev_get_drvdata(dev); + + return device-device_data; +} +EXPORT_SYMBOL_GPL(vfio_get_vdev); + This is unsafe. How do we know dev is a vfio device? How do we keep that drvdata valid while you're using it? I think you want to export the existing vfio_group_get_device() and vfio_device_put(). Thanks, Alex /** * VFIO base fd, /dev/vfio/vfio */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ab9e862..3c97b03 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -45,6 +45,7 @@ extern int vfio_add_group_dev(struct device *dev, void *device_data); extern void *vfio_del_group_dev(struct device *dev); +extern void *vfio_get_vdev(struct device *dev); /** *
Re: [PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER
On Wed, 2013-01-09 at 10:52 -0700, Alex Williamson wrote: On Wed, 2013-01-09 at 06:26 +, Pandarathil, Vijaymohan R wrote: - New ioctl which is used to pass the eventfd that is signaled when an error occurs in the vfio_pci_device - Register pci_error_handler for the vfio_pci driver - When the device encounters an error, the error handler registered by the vfio_pci driver gets invoked by the AER infrastructure - In the error handler, signal the eventfd registered for the device. - This results in the qemu eventfd handler getting invoked and appropriate action taken for the guest. Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com --- drivers/vfio/pci/vfio_pci.c | 29 + drivers/vfio/pci/vfio_pci_private.h | 1 + drivers/vfio/vfio.c | 8 include/linux/vfio.h| 1 + include/uapi/linux/vfio.h | 9 + 5 files changed, 48 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c11994..4ae9526 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,8 @@ static long vfio_pci_ioctl(void *device_data, if (vdev-reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; + info.flags |= VFIO_DEVICE_FLAGS_AER_NOTIFY; + This appears to be a PCI specific flag, so the name should include _PCI_. We also support non-PCIe devices and it seems like it would be possible to not have AER support available, so shouldn't this be conditional? info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; @@ -348,6 +350,19 @@ static long vfio_pci_ioctl(void *device_data, return ret; + } else if (cmd == VFIO_DEVICE_SET_ERRFD) { + int32_t fd = (int32_t)arg; + + if (fd 0) + return -EINVAL; + + vdev-err_trigger = eventfd_ctx_fdget(fd); + + if (IS_ERR(vdev-err_trigger)) + return PTR_ERR(vdev-err_trigger); + + return 0; + I'm not sure why we wouldn't describe this as just another interrupt from the device and configure it via SET_IRQ. This ioctl has very limited use and doesn't follow any of the conventions of all the other vfio ioctls. } else if (cmd == VFIO_DEVICE_RESET) return vdev-reset_works ? pci_reset_function(vdev-pdev) : -EINVAL; @@ -527,11 +542,25 @@ static void vfio_pci_remove(struct pci_dev *pdev) kfree(vdev); } +static pci_ers_result_t vfio_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct vfio_pci_device *vdev = vfio_get_vdev(pdev-dev); + + eventfd_signal(vdev-err_trigger, 1); + return PCI_ERS_RESULT_CAN_RECOVER; +} What if err_trigger hasn't been set? + +static const struct pci_error_handlers vfio_err_handlers = { + .error_detected = vfio_err_detected, +}; + static struct pci_driver vfio_pci_driver = { .name = vfio-pci, .id_table = NULL, /* only dynamic ids */ .probe = vfio_pci_probe, .remove = vfio_pci_remove, + .err_handler= vfio_err_handlers, }; static void __exit vfio_pci_cleanup(void) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 611827c..daee62f 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -55,6 +55,7 @@ struct vfio_pci_device { boolbardirty; struct pci_saved_state *pci_saved_state; atomic_trefcnt; + struct eventfd_ctx *err_trigger; }; #define is_intx(vdev) (vdev-irq_type == VFIO_PCI_INTX_IRQ_INDEX) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56097c6..5ed5a54 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -693,6 +693,14 @@ void *vfio_del_group_dev(struct device *dev) } EXPORT_SYMBOL_GPL(vfio_del_group_dev); +void *vfio_get_vdev(struct device *dev) +{ + struct vfio_device *device = dev_get_drvdata(dev); + + return device-device_data; +} +EXPORT_SYMBOL_GPL(vfio_get_vdev); + This is unsafe. How do we know dev is a vfio device? How do we keep that drvdata valid while you're using it? I think you want to export the existing vfio_group_get_device() and vfio_device_put(). Thanks, vfio_group_get_device() isn't quite what you want either since it assumes a reference count on the group. You'll want something like vfio_add_group_dev() or vfio_dev_present(), perhaps moving the iommu_group_get - vfio_group_get_from_iommu - vfio_group_get_device into a helper function. Thanks, Alex /** * VFIO base fd,