>-----Original Message-----
>From: Eric Auger <eric.au...@redhat.com>
>Sent: Friday, November 17, 2023 9:54 PM
>Subject: Re: [PATCH v6 09/21] vfio/iommufd: Enable pci hot reset through
>iommufd cdev interface
>
>
>
>On 11/14/23 11:09, Zhenzhong Duan wrote:
>> Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific
>> check and reset operation.
>
>nit: Implement the newly introduced pci_hot_reset callback?

Yes

>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>> v6: pci_hot_reset return -errno if fails
>>
>>  hw/vfio/iommufd.c    | 145
>+++++++++++++++++++++++++++++++++++++++++++
>>  hw/vfio/trace-events |   1 +
>>  2 files changed, 146 insertions(+)
>>
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index e5bf528e89..3eec428162 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -24,6 +24,7 @@
>>  #include "sysemu/reset.h"
>>  #include "qemu/cutils.h"
>>  #include "qemu/chardev_open.h"
>> +#include "pci.h"
>>
>>  static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
>>                              ram_addr_t size, void *vaddr, bool readonly)
>> @@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice
>*vbasedev)
>>      close(vbasedev->fd);
>>  }
>>
>> +static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
>> +{
>> +    VFIODevice *vbasedev_iter;
>> +
>> +    QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
>> +        if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
>> +            continue;
>> +        }
>> +        if (devid == vbasedev_iter->devid) {
>> +            return vbasedev_iter;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
>> +{
>> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +    struct vfio_pci_hot_reset_info *info = NULL;
>> +    struct vfio_pci_dependent_device *devices;
>> +    struct vfio_pci_hot_reset *reset;
>> +    int ret, i;
>> +    bool multi = false;
>> +
>> +    trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
>> +
>> +    if (!single) {
>> +        vfio_pci_pre_reset(vdev);
>> +    }
>> +    vdev->vbasedev.needs_reset = false;
>> +
>> +    ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
>> +
>> +    if (ret) {
>> +        goto out_single;
>> +    }
>> +
>> +    assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
>> +
>> +    devices = &info->devices[0];
>> +
>> +    if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
>> +        if (!vdev->has_pm_reset) {
>> +            for (i = 0; i < info->count; i++) {
>> +                if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
>> +                    error_report("vfio: Cannot reset device %s, "
>> +                                 "depends on device %04x:%02x:%02x.%x "
>> +                                 "which is not owned.",
>> +                                 vdev->vbasedev.name, devices[i].segment,
>> +                                 devices[i].bus, PCI_SLOT(devices[i].devfn),
>> +                                 PCI_FUNC(devices[i].devfn));
>> +                }
>> +            }
>> +        }
>> +        ret = -EPERM;
>> +        goto out_single;
>> +    }
>> +
>> +    trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
>> +
>> +    for (i = 0; i < info->count; i++) {
>> +        VFIOPCIDevice *tmp;
>> +        VFIODevice *vbasedev_iter;
>> +
>> +        trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
>> +                                                     devices[i].bus,
>> +                                                     
>> PCI_SLOT(devices[i].devfn),
>> +                                                     
>> PCI_FUNC(devices[i].devfn),
>> +                                                     devices[i].devid);
>> +
>> +        /*
>> +         * If a VFIO cdev device is resettable, all the dependent devices
>> +         * are either bound to same iommufd or within same iommu_groups as
>> +         * one of the iommufd bound devices.
>> +         */
>> +        assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
>> +
>> +        if (devices[i].devid == vdev->vbasedev.devid ||
>> +            devices[i].devid == VFIO_PCI_DEVID_OWNED) {
>> +            continue;
>> +        }
>> +
>> +        vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
>> +        if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
>> +            vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> +            continue;
>> +        }
>> +        tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>> +        if (single) {
>> +            ret = -EINVAL;
>> +            goto out_single;
>> +        }
>> +        vfio_pci_pre_reset(tmp);
>> +        tmp->vbasedev.needs_reset = false;
>> +        multi = true;
>> +    }
>> +
>> +    if (!single && !multi) {
>> +        ret = -EINVAL;
>> +        goto out_single;
>> +    }
>> +
>> +    /* Use zero length array for hot reset with iommufd backend */
>> +    reset = g_malloc0(sizeof(*reset));
>> +    reset->argsz = sizeof(*reset);
>> +
>> +     /* Bus reset! */
>> +    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
>> +    g_free(reset);
>> +    if (ret) {
>> +        ret = -errno;
>> +    }
>> +
>> +    trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
>> +                                    ret ? strerror(errno) : "Success");
>> +
>> +    /* Re-enable INTx on affected devices */
>> +    for (i = 0; i < info->count; i++) {
>> +        VFIOPCIDevice *tmp;
>> +        VFIODevice *vbasedev_iter;
>> +
>> +        if (devices[i].devid == vdev->vbasedev.devid ||
>> +            devices[i].devid == VFIO_PCI_DEVID_OWNED) {
>> +            continue;
>> +        }
>> +
>> +        vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
>> +        if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
>> +            vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> +            continue;
>> +        }
>> +        tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>nit: I see this block of code also is used above for the pre_reset. May
>be interesting to introduce an helper? Could be done later though

Will do in v7.

Thanks
Zhenzhong

>> +        vfio_pci_post_reset(tmp);
>> +    }
>> +out_single:
>> +    if (!single) {
>> +        vfio_pci_post_reset(vdev);
>> +    }
>> +    g_free(info);
>> +
>> +    return ret;
>> +}
>> +
>>  const VFIOIOMMUOps vfio_iommufd_ops = {
>>      .dma_map = iommufd_cdev_map,
>>      .dma_unmap = iommufd_cdev_unmap,
>>      .attach_device = iommufd_cdev_attach,
>>      .detach_device = iommufd_cdev_detach,
>> +    .pci_hot_reset = iommufd_cdev_pci_hot_reset,
>>  };
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index 5d3e9e8cee..d838232d5a 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const
>char *name, const char *str, in
>>  iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
>>  iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new
>IOMMUFD container with ioasid=%d"
>>  iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int
>num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
>> +iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int
>function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
>Otherwise looks good to me.
>
>Reviewed-by: Eric Auger <eric.au...@redhat.com>
>
>
>Eric
>
>

Reply via email to