Re: [Intel-gfx] [PATCH v2 04/14] vfio: Provide better generic support for open/release vfio_device_ops

2021-07-23 Thread Jason Gunthorpe
On Fri, Jul 23, 2021 at 09:39:14AM +0200, Christoph Hellwig wrote:

> This looks unessecarily complicated.  We can just try to load first
> and then store it under the same lock, e.g.:

Yes indeed, I went with this:

int vfio_assign_device_set(struct vfio_device *device, void *set_id)
{
unsigned long idx = (unsigned long)set_id;
struct vfio_device_set *new_dev_set;
struct vfio_device_set *dev_set;

if (WARN_ON(!set_id))
return -EINVAL;

/*
 * Atomically acquire a singleton object in the xarray for this set_id
 */
xa_lock(_device_set_xa);
dev_set = xa_load(_device_set_xa, idx);
if (dev_set)
goto found_get_ref;
xa_unlock(_device_set_xa);

new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
if (!new_dev_set)
return -ENOMEM;
mutex_init(_dev_set->lock);
new_dev_set->set_id = set_id;

xa_lock(_device_set_xa);
dev_set = __xa_cmpxchg(_device_set_xa, idx, NULL, new_dev_set,
   GFP_KERNEL);
if (!dev_set) {
dev_set = new_dev_set;
goto found_get_ref;
}

kfree(new_dev_set);
if (xa_is_err(dev_set)) {
xa_unlock(_device_set_xa);
return xa_err(dev_set);
}

found_get_ref:
dev_set->device_count++;
xa_unlock(_device_set_xa);
device->dev_set = dev_set;
return 0;
}

I'm also half inclined to delete the xa_load() since I think the
common case here is to need the allocate...

>   xa_lock(_device_set_xa);
>   set = xa_load(_device_set_xa, idx);
>   if (set) {
>   kfree(new);
>   goto found;
>   }
>   err = xa_err(__xa_store(_device_set_xa, idx, new, GFP_KERNEL));

AIUI this is subtly racy:


  CPU1   CPU2
xa_lock()
 xa_load() == NULL
 xa_store()
   __xas_nomem()
 xa_unlock()
 xa_lock()
  xa_load() == NULL
  xa_store()
   __xas_nomem()
xa_unlock()
 kmem_cache_alloc()
  kmem_cache_alloc()
 xa_lock()
   [idx] = new1
xa_unlock()
xa_lock()
   [idx] = new2// Woops, lost new1!
 xa_unlock()

The hidden xa unlock is really tricky.

The __xa_cmpxchg is safe against this.

Jason
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 04/14] vfio: Provide better generic support for open/release vfio_device_ops

2021-07-23 Thread Christoph Hellwig
> +int vfio_assign_device_set(struct vfio_device *device, void *set_id)
> +{
> + struct vfio_device_set *alloc_dev_set = NULL;
> + struct vfio_device_set *dev_set;
> +
> + if (WARN_ON(!set_id))
> + return -EINVAL;
> +
> + /*
> +  * Atomically acquire a singleton object in the xarray for this set_id
> +  */
> +again:
> + xa_lock(_device_set_xa);
> + if (alloc_dev_set) {
> + dev_set = __xa_cmpxchg(_device_set_xa,
> +(unsigned long)set_id, NULL,
> +alloc_dev_set, GFP_KERNEL);
> + if (xa_is_err(dev_set)) {
> + xa_unlock(_device_set_xa);
> + kfree(alloc_dev_set);
> + return xa_err(dev_set);
> + }
> + if (!dev_set)
> + dev_set = alloc_dev_set;
> + } else {
> + dev_set = xa_load(_device_set_xa, (unsigned long)set_id);
> + }
> +
> + if (dev_set) {
> + dev_set->device_count++;
> + xa_unlock(_device_set_xa);
> + device->dev_set = dev_set;
> + if (dev_set != alloc_dev_set)
> + kfree(alloc_dev_set);
> + return 0;
> + }
> + xa_unlock(_device_set_xa);
> +
> + if (WARN_ON(alloc_dev_set))
> + return -EINVAL;
> +
> + alloc_dev_set = kzalloc(sizeof(*alloc_dev_set), GFP_KERNEL);
> + if (!alloc_dev_set)
> + return -ENOMEM;
> + mutex_init(_dev_set->lock);
> + alloc_dev_set->set_id = set_id;
> + goto again;
> +}
> +EXPORT_SYMBOL_GPL(vfio_assign_device_set);

This looks unessecarily complicated.  We can just try to load first
and then store it under the same lock, e.g.:

int vfio_assign_device_set(struct vfio_device *device, void *set_id)
{
unsigned long idx = (unsigned long)set_id;
struct vfio_device_set *set, *new;
int err;

if (WARN_ON(!set_id))
return -EINVAL;

xa_lock(_device_set_xa);
set = xa_load(_device_set_xa, idx);
if (set)
goto found;
xa_unlock(_device_set_xa);

new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return -ENOMEM;
mutex_init(>lock);
alloc_dev_set->set_id = set_id;

xa_lock(_device_set_xa);
set = xa_load(_device_set_xa, idx);
if (set) {
kfree(new);
goto found;
}
err = xa_err(__xa_store(_device_set_xa, idx, new, GFP_KERNEL));
xa_unlock(_device_set_xa);
if (err)
kfree(new);
return err;

found:
set->device_count++;
xa_unlock(_device_set_xa);

device->dev_set = set;
return 0;
}

> +static void vfio_release_device_set(struct vfio_device *device)
> +{
> + struct vfio_device_set *dev_set = device->dev_set;
> +
> + if (!dev_set)
> + return;
> +
> + xa_lock(_device_set_xa);
> + dev_set->device_count--;
> + if (!dev_set->device_count) {

Nit, by I'd find

if (!--dev_set->device_count) {

easier to follow as it clearly documents the dec_and_test pattern.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 04/14] vfio: Provide better generic support for open/release vfio_device_ops

2021-07-22 Thread Cornelia Huck
On Tue, Jul 20 2021, Jason Gunthorpe  wrote:

> Currently the driver ops have an open/release pair that is called once
> each time a device FD is opened or closed. Add an additional set of
> open/close_device() ops which are called when the device FD is opened for
> the first time and closed for the last time.
>
> An analysis shows that all of the drivers require this semantic. Some are
> open coding it as part of their reflck implementation, and some are just
> buggy and miss it completely.
>
> To retain the current semantics PCI and FSL depend on, introduce the idea
> of a "device set" which is a grouping of vfio_device's that share the same
> lock around opening.
>
> The device set is established by providing a 'set_id' pointer. All
> vfio_device's that provide the same pointer will be joined to the same
> singleton memory and lock across the whole set. This effectively replaces
> the oddly named reflck.
>
> After conversion the set_id will be sourced from:
>  - A struct device from a fsl_mc_device (fsl)
>  - A struct pci_slot (pci)
>  - A struct pci_bus (pci)
>  - The struct vfio_device (everything)
>
> The design ensures that the above pointers are live as long as the
> vfio_device is registered, so they form reliable unique keys to group
> vfio_devices into sets.
>
> This implementation uses xarray instead of searching through the driver
> core structures, which simplifies the somewhat tricky locking in this
> area.
>
> Following patches convert all the drivers.
>
> Signed-off-by: Yishai Hadas 
> Signed-off-by: Jason Gunthorpe 
> ---
>  drivers/vfio/mdev/vfio_mdev.c |  22 +
>  drivers/vfio/vfio.c   | 146 +-
>  include/linux/mdev.h  |   2 +
>  include/linux/vfio.h  |  19 +
>  4 files changed, 167 insertions(+), 22 deletions(-)

Reviewed-by: Cornelia Huck 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 04/14] vfio: Provide better generic support for open/release vfio_device_ops

2021-07-20 Thread Jason Gunthorpe
Currently the driver ops have an open/release pair that is called once
each time a device FD is opened or closed. Add an additional set of
open/close_device() ops which are called when the device FD is opened for
the first time and closed for the last time.

An analysis shows that all of the drivers require this semantic. Some are
open coding it as part of their reflck implementation, and some are just
buggy and miss it completely.

To retain the current semantics PCI and FSL depend on, introduce the idea
of a "device set" which is a grouping of vfio_device's that share the same
lock around opening.

The device set is established by providing a 'set_id' pointer. All
vfio_device's that provide the same pointer will be joined to the same
singleton memory and lock across the whole set. This effectively replaces
the oddly named reflck.

After conversion the set_id will be sourced from:
 - A struct device from a fsl_mc_device (fsl)
 - A struct pci_slot (pci)
 - A struct pci_bus (pci)
 - The struct vfio_device (everything)

The design ensures that the above pointers are live as long as the
vfio_device is registered, so they form reliable unique keys to group
vfio_devices into sets.

This implementation uses xarray instead of searching through the driver
core structures, which simplifies the somewhat tricky locking in this
area.

Following patches convert all the drivers.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/mdev/vfio_mdev.c |  22 +
 drivers/vfio/vfio.c   | 146 +-
 include/linux/mdev.h  |   2 +
 include/linux/vfio.h  |  19 +
 4 files changed, 167 insertions(+), 22 deletions(-)

diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index a5c77ccb24f70a..725cd2fe675190 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -17,6 +17,26 @@
 
 #include "mdev_private.h"
 
+static int vfio_mdev_open_device(struct vfio_device *core_vdev)
+{
+   struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+   struct mdev_parent *parent = mdev->type->parent;
+
+   if (unlikely(!parent->ops->open_device))
+   return 0;
+
+   return parent->ops->open_device(mdev);
+}
+
+static void vfio_mdev_close_device(struct vfio_device *core_vdev)
+{
+   struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+   struct mdev_parent *parent = mdev->type->parent;
+
+   if (likely(parent->ops->close_device))
+   parent->ops->close_device(mdev);
+}
+
 static int vfio_mdev_open(struct vfio_device *core_vdev)
 {
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
@@ -100,6 +120,8 @@ static void vfio_mdev_request(struct vfio_device 
*core_vdev, unsigned int count)
 
 static const struct vfio_device_ops vfio_mdev_dev_ops = {
.name   = "vfio-mdev",
+   .open_device= vfio_mdev_open_device,
+   .close_device   = vfio_mdev_close_device,
.open   = vfio_mdev_open,
.release= vfio_mdev_release,
.ioctl  = vfio_mdev_unlocked_ioctl,
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index cc375df0fd5dda..8572d943320214 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -96,6 +96,76 @@ module_param_named(enable_unsafe_noiommu_mode,
 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  
This mode provides no device isolation, no DMA translation, no host kernel 
protection, cannot be used for device assignment to virtual machines, requires 
RAWIO permissions, and will taint the kernel.  If you do not know what this is 
for, step away. (default: false)");
 #endif
 
+static DEFINE_XARRAY(vfio_device_set_xa);
+
+int vfio_assign_device_set(struct vfio_device *device, void *set_id)
+{
+   struct vfio_device_set *alloc_dev_set = NULL;
+   struct vfio_device_set *dev_set;
+
+   if (WARN_ON(!set_id))
+   return -EINVAL;
+
+   /*
+* Atomically acquire a singleton object in the xarray for this set_id
+*/
+again:
+   xa_lock(_device_set_xa);
+   if (alloc_dev_set) {
+   dev_set = __xa_cmpxchg(_device_set_xa,
+  (unsigned long)set_id, NULL,
+  alloc_dev_set, GFP_KERNEL);
+   if (xa_is_err(dev_set)) {
+   xa_unlock(_device_set_xa);
+   kfree(alloc_dev_set);
+   return xa_err(dev_set);
+   }
+   if (!dev_set)
+   dev_set = alloc_dev_set;
+   } else {
+   dev_set = xa_load(_device_set_xa, (unsigned long)set_id);
+   }
+
+   if (dev_set) {
+   dev_set->device_count++;
+   xa_unlock(_device_set_xa);
+   device->dev_set = dev_set;
+   if (dev_set != alloc_dev_set)
+   kfree(alloc_dev_set);
+