>-----Original Message-----
>From: Cédric Le Goater <c...@redhat.com>
>Sent: Friday, November 10, 2023 5:34 PM
>Subject: Re: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend
>
>On 11/9/23 12:45, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l....@intel.com>
>>
>> Add the iommufd backend. The IOMMUFD container class is implemented
>> based on the new /dev/iommu user API. This backend obviously depends
>> on CONFIG_IOMMUFD.
>>
>> So far, the iommufd backend doesn't support dirty page sync yet due
>> to missing support in the host kernel.
>>
>> Co-authored-by: Eric Auger <eric.au...@redhat.com>
>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>> v5: Switch to IOAS attach/detach and hide hwpt
>>
>>   include/hw/vfio/vfio-common.h |  11 +
>>   hw/vfio/common.c              |  20 +-
>>   hw/vfio/iommufd.c             | 429 ++++++++++++++++++++++++++++++++++
>>   hw/vfio/meson.build           |   3 +
>>   hw/vfio/trace-events          |  10 +
>>   5 files changed, 469 insertions(+), 4 deletions(-)
>>   create mode 100644 hw/vfio/iommufd.c
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 24ecc0e7ee..3dac5c167e 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow {
>>       QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
>>   } VFIOHostDMAWindow;
>>
>> +typedef struct IOMMUFDBackend IOMMUFDBackend;
>> +
>> +typedef struct VFIOIOMMUFDContainer {
>> +    VFIOContainerBase bcontainer;
>> +    IOMMUFDBackend *be;
>> +    uint32_t ioas_id;
>> +} VFIOIOMMUFDContainer;
>> +
>>   typedef struct VFIODeviceOps VFIODeviceOps;
>>
>>   typedef struct VFIODevice {
>> @@ -116,6 +124,8 @@ typedef struct VFIODevice {
>>       OnOffAuto pre_copy_dirty_page_tracking;
>>       bool dirty_pages_supported;
>>       bool dirty_tracking;
>> +    int devid;
>> +    IOMMUFDBackend *iommufd;
>>   } VFIODevice;
>>
>>   struct VFIODeviceOps {
>> @@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice)
>VFIODeviceList;
>>   extern VFIOGroupList vfio_group_list;
>>   extern VFIODeviceList vfio_device_list;
>>   extern const VFIOIOMMUOps vfio_legacy_ops;
>> +extern const VFIOIOMMUOps vfio_iommufd_ops;
>>   extern const MemoryListener vfio_memory_listener;
>>   extern int vfio_kvm_device_fd;
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 572ae7c934..3b7e11158f 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -19,6 +19,7 @@
>>    */
>>
>>   #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>   #include <sys/ioctl.h>
>>   #ifdef CONFIG_KVM
>>   #include <linux/kvm.h>
>> @@ -1462,10 +1463,13 @@ VFIOAddressSpace
>*vfio_get_address_space(AddressSpace *as)
>>
>>   void vfio_put_address_space(VFIOAddressSpace *space)
>>   {
>> -    if (QLIST_EMPTY(&space->containers)) {
>> -        QLIST_REMOVE(space, list);
>> -        g_free(space);
>> +    if (!QLIST_EMPTY(&space->containers)) {
>> +        return;
>
>I think this change deserves to be in a separate patch, even if simple.
>Is there some relation with iommufd ? This is not clear.

OK, will do. It's unrelated to iommufd, just avoid unnecessary check below. 

>
>>       }
>> +
>> +    QLIST_REMOVE(space, list);
>> +    g_free(space);
>> +
>>       if (QLIST_EMPTY(&vfio_address_spaces)) {
>>           qemu_unregister_reset(vfio_reset_handler, NULL);
>>       }
>> @@ -1498,8 +1502,16 @@ retry:
>>   int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>                          AddressSpace *as, Error **errp)
>>   {
>> -    const VFIOIOMMUOps *ops = &vfio_legacy_ops;
>> +    const VFIOIOMMUOps *ops;
>>
>> +#ifdef CONFIG_IOMMUFD
>> +    if (vbasedev->iommufd) {
>> +        ops = &vfio_iommufd_ops;
>> +    } else
>> +#endif
>> +    {
>> +        ops = &vfio_legacy_ops;
>> +    }
>
>Simply adding :
>
>  +#ifdef CONFIG_IOMMUFD
>  +    if (vbasedev->iommufd) {
>  +        ops = &vfio_iommufd_ops;
>  +    }
>  +#endif
>
>would have the same effect with less change.

Indeed, will do.

>
>That said, it would also be nice to find a way to avoid the use of
>CONFIG_IOMMUFD in hw/vfio/common.c. May be with a helper returning
>'const VFIOIOMMUOps *'. This is minor. Still, I find some redundancy
>with vfio_container_init() and I don't a good alternative yet :)

Sure, will do, guess you mean a helper function in hw/vfio/helpers.c with
CONFIG_IOMMUFD check?

>
>
>>       return ops->attach_device(name, vbasedev, as, errp);
>>   }
>>
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> new file mode 100644
>> index 0000000000..ea4e23f4ec
>> --- /dev/null
>> +++ b/hw/vfio/iommufd.c
>> @@ -0,0 +1,429 @@
>> +/*
>> + * iommufd container backend
>> + *
>> + * Copyright (C) 2023 Intel Corporation.
>> + * Copyright Red Hat, Inc. 2023
>> + *
>> + * Authors: Yi Liu <yi.l....@intel.com>
>> + *          Eric Auger <eric.au...@redhat.com>
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include <sys/ioctl.h>
>> +#include <linux/vfio.h>
>> +#include <linux/iommufd.h>
>> +
>> +#include "hw/vfio/vfio-common.h"
>> +#include "qemu/error-report.h"
>> +#include "trace.h"
>> +#include "qapi/error.h"
>> +#include "sysemu/iommufd.h"
>> +#include "hw/qdev-core.h"
>> +#include "sysemu/reset.h"
>> +#include "qemu/cutils.h"
>> +#include "qemu/chardev_open.h"
>> +
>> +static int iommufd_map(VFIOContainerBase *bcontainer, hwaddr iova,
>> +                       ram_addr_t size, void *vaddr, bool readonly)
>> +{
>> +    VFIOIOMMUFDContainer *container =
>> +        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>> +
>> +    return iommufd_backend_map_dma(container->be,
>> +                                   container->ioas_id,
>> +                                   iova, size, vaddr, readonly);
>> +}
>> +
>> +static int iommufd_unmap(VFIOContainerBase *bcontainer,
>> +                         hwaddr iova, ram_addr_t size,
>> +                         IOMMUTLBEntry *iotlb)
>> +{
>> +    VFIOIOMMUFDContainer *container =
>> +        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>> +
>> +    /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
>> +    return iommufd_backend_unmap_dma(container->be,
>> +                                     container->ioas_id, iova, size);
>> +}
>> +
>> +static void iommufd_cdev_kvm_device_add(VFIODevice *vbasedev)
>> +{
>> +    Error *err = NULL;
>> +
>> +    if (vfio_kvm_device_add_fd(vbasedev->fd, &err)) {
>> +        error_report_err(err);
>
>We should propagate this error to the callers instead.

This is to follow legacy backend where the error doesn't treated as
a serious issue.

>
>> +    }
>> +}
>> +
>> +static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
>> +{
>> +    Error *err = NULL;
>> +
>> +    if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
>> +        error_report_err(err);
>
>Possibly this one also but It might be more complex. Let's keep it that
>way.
>
>> +    }
>> +}
>> +
>> +static int iommufd_connect_and_bind(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    IOMMUFDBackend *iommufd = vbasedev->iommufd;
>> +    struct vfio_device_bind_iommufd bind = {
>> +        .argsz = sizeof(bind),
>> +        .flags = 0,
>> +    };
>> +    int ret;
>> +
>> +    ret = iommufd_backend_connect(iommufd, errp);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    /*
>> +     * Add device to kvm-vfio to be prepared for the tracking
>> +     * in KVM. Especially for some emulated devices, it requires
>> +     * to have kvm information in the device open.
>> +     */
>> +    iommufd_cdev_kvm_device_add(vbasedev);
>
>We shoud return a possible error.

This is to follow legacy backend where this error is reported and ignored.
Do we need a difference for iommufd BE?

>
>> +
>> +    /* Bind device to iommufd */
>> +    bind.iommufd = iommufd->fd;
>> +    ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind);
>> +    if (ret) {
>> +        error_setg_errno(errp, errno, "error bind device fd=%d to 
>> iommufd=%d",
>> +                         vbasedev->fd, bind.iommufd);
>> +        goto err_bind;
>> +    }
>> +
>> +    vbasedev->devid = bind.out_devid;
>> +    trace_iommufd_connect_and_bind(bind.iommufd, vbasedev->name,
>vbasedev->fd,
>> +                                   vbasedev->devid);
>> +    return ret;
>> +err_bind:
>> +    iommufd_cdev_kvm_device_del(vbasedev);
>> +    iommufd_backend_disconnect(iommufd);
>
>These two calls look like iommufd_unbind_and_disconnect() below.

Yes, they are same as iommufd doesn't support explicit device unbind.
But it looks strange to call iommufd_unbind_and_disconnect in
iommufd_connect_and_bind.

>
>> +    return ret;
>> +}
>> +
>> +static void iommufd_unbind_and_disconnect(VFIODevice *vbasedev)
>> +{
>> +    /* Unbind is automatically conducted when device fd is closed */
>> +    iommufd_cdev_kvm_device_del(vbasedev);
>> +    iommufd_backend_disconnect(vbasedev->iommufd);
>> +}
>> +
>> +static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>> +{
>> +    long int ret = -ENOTTY;
>> +    char *path, *vfio_dev_path = NULL, *vfio_path = NULL;
>> +    DIR *dir = NULL;
>> +    struct dirent *dent;
>> +    gchar *contents;
>> +    struct stat st;
>> +    gsize length;
>> +    int major, minor;
>> +    dev_t vfio_devt;
>> +
>> +    path = g_strdup_printf("%s/vfio-dev", sysfs_path);
>> +    if (stat(path, &st) < 0) {
>> +        error_setg_errno(errp, errno, "no such host device");
>> +        goto out_free_path;
>> +    }
>> +
>> +    dir = opendir(path);
>> +    if (!dir) {
>> +        error_setg_errno(errp, errno, "couldn't open dirrectory %s", path);
>
>
>directory

Good catch, will fix.

Thanks
Zhenzhong

Reply via email to