On 9/27/23 14:32, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.au...@redhat.com>
>> Sent: Wednesday, September 27, 2023 8:23 PM
>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>
>>
>>
>> On 9/27/23 12:07, Duan, Zhenzhong wrote:
>>> Hi Eric,
>>>
>>>> -----Original Message-----
>>>> From: Eric Auger <eric.au...@redhat.com>
>>>> Sent: Wednesday, September 27, 2023 5:02 PM
>>>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce
>>>> vfio_[attach/detach]_device
>>>>
>>>> Hi Zhenzhong,
>>>>
>>>> On 9/26/23 13:32, Zhenzhong Duan wrote:
>>>>> From: Eric Auger <eric.au...@redhat.com>
>>>>>
>>>>> We want the VFIO devices to be able to use two different
>>>>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>>>>
>>>>> Introduce vfio_[attach/detach]_device which aim at hiding the
>>>>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>>>>
>>>>> Once vfio_attach_device completes, the device is attached
>>>>> to a security context and its fd can be used. Conversely
>>>>> When vfio_detach_device completes, the device has been
>>>>> detached from the security context.
>>>>>
>>>>> At the moment only the implementation based on the legacy
>>>>> container/group exists. Let's use it from the vfio-pci device.
>>>>> Subsequent patches will handle other devices.
>>>>>
>>>>> Signed-off-by: Eric Auger <eric.au...@redhat.com>
>>>>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>>>>> ---
>>>>> include/hw/vfio/vfio-common.h | 3 ++
>>>>> hw/vfio/common.c | 68 +++++++++++++++++++++++++++++++++++
>>>>> hw/vfio/pci.c | 50 +++-----------------------
>>>>> hw/vfio/trace-events | 2 +-
>>>>> 4 files changed, 77 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>> common.h
>>>>> index c4e7c3b4a7..12fbfbc37d 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>>> int vfio_get_device(VFIOGroup *group, const char *name,
>>>>> VFIODevice *vbasedev, Error **errp);
>>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> + AddressSpace *as, Error **errp);
>>>>> +void vfio_detach_device(VFIODevice *vbasedev);
>>>>>
>>>>> int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>>> int vfio_kvm_device_del_fd(int fd, Error **errp);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 959b1362bb..7f3798b152 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t
>> op)
>>>>> }
>>>>> return vfio_eeh_container_op(container, op);
>>>>> }
>>>>> +
>>>>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> + char *tmp, group_path[PATH_MAX], *group_name;
>>>>> + int ret, groupid;
>>>>> + ssize_t len;
>>>>> +
>>>>> + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>>> + len = readlink(tmp, group_path, sizeof(group_path));
>>>>> + g_free(tmp);
>>>>> +
>>>>> + if (len <= 0 || len >= sizeof(group_path)) {
>>>>> + ret = len < 0 ? -errno : -ENAMETOOLONG;
>>>>> + error_setg_errno(errp, -ret, "no iommu_group found");
>>>>> + return ret;
>>>>> + }
>>>>> +
>>>>> + group_path[len] = 0;
>>>>> +
>>>>> + group_name = basename(group_path);
>>>>> + if (sscanf(group_name, "%d", &groupid) != 1) {
>>>>> + error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>>> + return -errno;
>>>>> + }
>>>>> + return groupid;
>>>>> +}
>>>>> +
>>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> + AddressSpace *as, Error **errp)
>>>>> +{
>>>>> + int groupid = vfio_device_groupid(vbasedev, errp);
>>>>> + VFIODevice *vbasedev_iter;
>>>>> + VFIOGroup *group;
>>>>> + int ret;
>>>>> +
>>>>> + if (groupid < 0) {
>>>>> + return groupid;
>>>>> + }
>>>>> +
>>>>> + trace_vfio_attach_device(vbasedev->name, groupid);
>>>>> +
>>>>> + group = vfio_get_group(groupid, as, errp);
>>>>> + if (!group) {
>>>>> + return -ENOENT;
>>>>> + }
>>>>> +
>>>>> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>>> + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>>> + error_setg(errp, "device is already attached");
>>>>> + vfio_put_group(group);
>>>>> + return -EBUSY;
>>>>> + }
>>>>> + }
>>>>> + ret = vfio_get_device(group, name, vbasedev, errp);
>>>>> + if (ret) {
>>>>> + vfio_put_group(group);
>>>>> + }
>>>>> +
>>>>> + return ret;
>>>>> +}
>>>>> +
>>>>> +void vfio_detach_device(VFIODevice *vbasedev)
>>>>> +{
>>>>> + VFIOGroup *group = vbasedev->group;
>>>>> +
>>>>> + vfio_put_base_device(vbasedev);
>>>>> + vfio_put_group(group);
>>>>> +}
>>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>>> index 3b2ca3c24c..fe56789893 100644
>>>>> --- a/hw/vfio/pci.c
>>>>> +++ b/hw/vfio/pci.c
>>>>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>>>> *vdev, Error **errp)
>>>>> static void vfio_put_device(VFIOPCIDevice *vdev)
>>>>> {
>>>>> + vfio_detach_device(&vdev->vbasedev);
>>>>> +
>>>>> g_free(vdev->vbasedev.name);
>>>>> g_free(vdev->msix);
>>>>> -
>>>>> - vfio_put_base_device(&vdev->vbasedev);
>>>>> }
>>>>>
>>>>> static void vfio_err_notifier_handler(void *opaque)
>>>>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>> {
>>>>> VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>>>> VFIODevice *vbasedev = &vdev->vbasedev;
>>>>> - VFIODevice *vbasedev_iter;
>>>>> - VFIOGroup *group;
>>>>> - char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>>>>> + char *tmp, *subsys;
>>>>> Error *err = NULL;
>>>>> - ssize_t len;
>>>>> struct stat st;
>>>>> - int groupid;
>>>>> int i, ret;
>>>>> bool is_mdev;
>>>>> char uuid[UUID_FMT_LEN];
>>>>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>>> vbasedev->dev = DEVICE(vdev);
>>>>>
>>>>> - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>>> - len = readlink(tmp, group_path, sizeof(group_path));
>>>>> - g_free(tmp);
>>>>> -
>>>>> - if (len <= 0 || len >= sizeof(group_path)) {
>>>>> - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>>>>> - "no iommu_group found");
>>>>> - goto error;
>>>>> - }
>>>>> -
>>>>> - group_path[len] = 0;
>>>>> -
>>>>> - group_name = basename(group_path);
>>>>> - if (sscanf(group_name, "%d", &groupid) != 1) {
>>>>> - error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>>> - goto error;
>>>>> - }
>>>>> -
>>>>> - trace_vfio_realize(vbasedev->name, groupid);
>>>>> -
>>>>> - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>>>> errp);
>>>>> - if (!group) {
>>>>> - goto error;
>>>>> - }
>>>>> -
>>>>> - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>>> - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>>> - error_setg(errp, "device is already attached");
>>>>> - vfio_put_group(group);
>>>>> - goto error;
>>>>> - }
>>>>> - }
>>>>> -
>>>>> /*
>>>>> * Mediated devices *might* operate compatibly with discarding of
>>>>> RAM,
>> but
>>>>> * we cannot know for certain, it depends on whether the mdev vendor
>>>> driver
>>>>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>>> if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>>>> error_setg(errp, "x-balloon-allowed only potentially compatible "
>>>>> "with mdev devices");
>>>>> - vfio_put_group(group);
>>>>> goto error;
>>>>> }
>>>>>
>>>>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>> name = g_strdup(vbasedev->name);
>>>>> }
>>>>>
>>>>> - ret = vfio_get_device(group, name, vbasedev, errp);
>>>>> + ret = vfio_attach_device(name, vbasedev,
>>>>> + pci_device_iommu_address_space(pdev), errp);
>>>>> g_free(name);
>>>>> if (ret) {
>>>>> - vfio_put_group(group);
>>>> independently on this patch, I think in case of error we leak
>>>> vbasedev->name. Please have a look and if confirmed you can send a
>>>> separate patch.
>>> In case of error, vbasedev->name is freed in vfio_put_device().
>> called in vfio_instance_finalize only. See comment below
>>>> Also I think if any subsequent action fail we shoudl properly detach the
>>>> detach the device so introduce an extra error goto label, my bad sorry.
>>> vfio_detach_device is called in vfio_instance_finalize(), this is just to
>>> follow
>>> the old code, group and container resource allocated in vfio_realize() are
>>> freed in vfio_instance_finalize().
>>>
>>> I agree this is strange, but I guess there may be some reason I'm unclear.
>> Yeah but if vfio_realize does fail, I am not sure the
>> vfio_instance_finalize gets called
> If vfio_realize fails, fio_exitfn() will not be called but
> vfio_instance_finalize will.
> Note vfio_instance_finalize() is called by object_unref().
ah OK that's what I forgot then :-(
Eric
>
> Thanks
> Zhenzhong
>