>-----Original Message----- >From: Cédric Le Goater <c...@redhat.com> >Sent: Friday, November 10, 2023 5:34 PM >Subject: Re: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend > >On 11/9/23 12:45, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l....@intel.com> >> >> Add the iommufd backend. The IOMMUFD container class is implemented >> based on the new /dev/iommu user API. This backend obviously depends >> on CONFIG_IOMMUFD. >> >> So far, the iommufd backend doesn't support dirty page sync yet due >> to missing support in the host kernel. >> >> Co-authored-by: Eric Auger <eric.au...@redhat.com> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> v5: Switch to IOAS attach/detach and hide hwpt >> >> include/hw/vfio/vfio-common.h | 11 + >> hw/vfio/common.c | 20 +- >> hw/vfio/iommufd.c | 429 ++++++++++++++++++++++++++++++++++ >> hw/vfio/meson.build | 3 + >> hw/vfio/trace-events | 10 + >> 5 files changed, 469 insertions(+), 4 deletions(-) >> create mode 100644 hw/vfio/iommufd.c >> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 24ecc0e7ee..3dac5c167e 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow { >> QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; >> } VFIOHostDMAWindow; >> >> +typedef struct IOMMUFDBackend IOMMUFDBackend; >> + >> +typedef struct VFIOIOMMUFDContainer { >> + VFIOContainerBase bcontainer; >> + IOMMUFDBackend *be; >> + uint32_t ioas_id; >> +} VFIOIOMMUFDContainer; >> + >> typedef struct VFIODeviceOps VFIODeviceOps; >> >> typedef struct VFIODevice { >> @@ -116,6 +124,8 @@ typedef struct VFIODevice { >> OnOffAuto pre_copy_dirty_page_tracking; >> bool dirty_pages_supported; >> bool dirty_tracking; >> + int devid; >> + IOMMUFDBackend *iommufd; >> } VFIODevice; >> >> struct VFIODeviceOps { >> @@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) >VFIODeviceList; >> extern VFIOGroupList vfio_group_list; >> extern VFIODeviceList vfio_device_list; >> extern const VFIOIOMMUOps vfio_legacy_ops; >> +extern const VFIOIOMMUOps vfio_iommufd_ops; >> extern const MemoryListener vfio_memory_listener; >> extern int vfio_kvm_device_fd; >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 572ae7c934..3b7e11158f 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -19,6 +19,7 @@ >> */ >> >> #include "qemu/osdep.h" >> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ >> #include <sys/ioctl.h> >> #ifdef CONFIG_KVM >> #include <linux/kvm.h> >> @@ -1462,10 +1463,13 @@ VFIOAddressSpace >*vfio_get_address_space(AddressSpace *as) >> >> void vfio_put_address_space(VFIOAddressSpace *space) >> { >> - if (QLIST_EMPTY(&space->containers)) { >> - QLIST_REMOVE(space, list); >> - g_free(space); >> + if (!QLIST_EMPTY(&space->containers)) { >> + return; > >I think this change deserves to be in a separate patch, even if simple. >Is there some relation with iommufd ? This is not clear.
OK, will do. It's unrelated to iommufd, just avoid unnecessary check below. > >> } >> + >> + QLIST_REMOVE(space, list); >> + g_free(space); >> + >> if (QLIST_EMPTY(&vfio_address_spaces)) { >> qemu_unregister_reset(vfio_reset_handler, NULL); >> } >> @@ -1498,8 +1502,16 @@ retry: >> int vfio_attach_device(char *name, VFIODevice *vbasedev, >> AddressSpace *as, Error **errp) >> { >> - const VFIOIOMMUOps *ops = &vfio_legacy_ops; >> + const VFIOIOMMUOps *ops; >> >> +#ifdef CONFIG_IOMMUFD >> + if (vbasedev->iommufd) { >> + ops = &vfio_iommufd_ops; >> + } else >> +#endif >> + { >> + ops = &vfio_legacy_ops; >> + } > >Simply adding : > > +#ifdef CONFIG_IOMMUFD > + if (vbasedev->iommufd) { > + ops = &vfio_iommufd_ops; > + } > +#endif > >would have the same effect with less change. Indeed, will do. > >That said, it would also be nice to find a way to avoid the use of >CONFIG_IOMMUFD in hw/vfio/common.c. May be with a helper returning >'const VFIOIOMMUOps *'. This is minor. Still, I find some redundancy >with vfio_container_init() and I don't a good alternative yet :) Sure, will do, guess you mean a helper function in hw/vfio/helpers.c with CONFIG_IOMMUFD check? > > >> return ops->attach_device(name, vbasedev, as, errp); >> } >> >> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >> new file mode 100644 >> index 0000000000..ea4e23f4ec >> --- /dev/null >> +++ b/hw/vfio/iommufd.c >> @@ -0,0 +1,429 @@ >> +/* >> + * iommufd container backend >> + * >> + * Copyright (C) 2023 Intel Corporation. >> + * Copyright Red Hat, Inc. 2023 >> + * >> + * Authors: Yi Liu <yi.l....@intel.com> >> + * Eric Auger <eric.au...@redhat.com> >> + * >> + * SPDX-License-Identifier: GPL-2.0-or-later >> + */ >> + >> +#include "qemu/osdep.h" >> +#include <sys/ioctl.h> >> +#include <linux/vfio.h> >> +#include <linux/iommufd.h> >> + >> +#include "hw/vfio/vfio-common.h" >> +#include "qemu/error-report.h" >> +#include "trace.h" >> +#include "qapi/error.h" >> +#include "sysemu/iommufd.h" >> +#include "hw/qdev-core.h" >> +#include "sysemu/reset.h" >> +#include "qemu/cutils.h" >> +#include "qemu/chardev_open.h" >> + >> +static int iommufd_map(VFIOContainerBase *bcontainer, hwaddr iova, >> + ram_addr_t size, void *vaddr, bool readonly) >> +{ >> + VFIOIOMMUFDContainer *container = >> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); >> + >> + return iommufd_backend_map_dma(container->be, >> + container->ioas_id, >> + iova, size, vaddr, readonly); >> +} >> + >> +static int iommufd_unmap(VFIOContainerBase *bcontainer, >> + hwaddr iova, ram_addr_t size, >> + IOMMUTLBEntry *iotlb) >> +{ >> + VFIOIOMMUFDContainer *container = >> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); >> + >> + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ >> + return iommufd_backend_unmap_dma(container->be, >> + container->ioas_id, iova, size); >> +} >> + >> +static void iommufd_cdev_kvm_device_add(VFIODevice *vbasedev) >> +{ >> + Error *err = NULL; >> + >> + if (vfio_kvm_device_add_fd(vbasedev->fd, &err)) { >> + error_report_err(err); > >We should propagate this error to the callers instead. This is to follow legacy backend where the error doesn't treated as a serious issue. > >> + } >> +} >> + >> +static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) >> +{ >> + Error *err = NULL; >> + >> + if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { >> + error_report_err(err); > >Possibly this one also but It might be more complex. Let's keep it that >way. > >> + } >> +} >> + >> +static int iommufd_connect_and_bind(VFIODevice *vbasedev, Error **errp) >> +{ >> + IOMMUFDBackend *iommufd = vbasedev->iommufd; >> + struct vfio_device_bind_iommufd bind = { >> + .argsz = sizeof(bind), >> + .flags = 0, >> + }; >> + int ret; >> + >> + ret = iommufd_backend_connect(iommufd, errp); >> + if (ret) { >> + return ret; >> + } >> + >> + /* >> + * Add device to kvm-vfio to be prepared for the tracking >> + * in KVM. Especially for some emulated devices, it requires >> + * to have kvm information in the device open. >> + */ >> + iommufd_cdev_kvm_device_add(vbasedev); > >We shoud return a possible error. This is to follow legacy backend where this error is reported and ignored. Do we need a difference for iommufd BE? > >> + >> + /* Bind device to iommufd */ >> + bind.iommufd = iommufd->fd; >> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); >> + if (ret) { >> + error_setg_errno(errp, errno, "error bind device fd=%d to >> iommufd=%d", >> + vbasedev->fd, bind.iommufd); >> + goto err_bind; >> + } >> + >> + vbasedev->devid = bind.out_devid; >> + trace_iommufd_connect_and_bind(bind.iommufd, vbasedev->name, >vbasedev->fd, >> + vbasedev->devid); >> + return ret; >> +err_bind: >> + iommufd_cdev_kvm_device_del(vbasedev); >> + iommufd_backend_disconnect(iommufd); > >These two calls look like iommufd_unbind_and_disconnect() below. Yes, they are same as iommufd doesn't support explicit device unbind. But it looks strange to call iommufd_unbind_and_disconnect in iommufd_connect_and_bind. > >> + return ret; >> +} >> + >> +static void iommufd_unbind_and_disconnect(VFIODevice *vbasedev) >> +{ >> + /* Unbind is automatically conducted when device fd is closed */ >> + iommufd_cdev_kvm_device_del(vbasedev); >> + iommufd_backend_disconnect(vbasedev->iommufd); >> +} >> + >> +static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) >> +{ >> + long int ret = -ENOTTY; >> + char *path, *vfio_dev_path = NULL, *vfio_path = NULL; >> + DIR *dir = NULL; >> + struct dirent *dent; >> + gchar *contents; >> + struct stat st; >> + gsize length; >> + int major, minor; >> + dev_t vfio_devt; >> + >> + path = g_strdup_printf("%s/vfio-dev", sysfs_path); >> + if (stat(path, &st) < 0) { >> + error_setg_errno(errp, errno, "no such host device"); >> + goto out_free_path; >> + } >> + >> + dir = opendir(path); >> + if (!dir) { >> + error_setg_errno(errp, errno, "couldn't open dirrectory %s", path); > > >directory Good catch, will fix. Thanks Zhenzhong