On 12/23/2016 1:51 AM, Alex Williamson wrote:
> Using the mtty mdev sample driver we can generate a remove race by
> starting one shell that continuously creates mtty devices and several
> other shells all attempting to remove devices, in my case four remove
> shells.  The fault occurs in mdev_remove_sysfs_files() where the
> passed type arg is NULL, which suggests we've received a struct device
> in mdev_device_remove() but it's in some sort of teardown state.  The
> solution here is to make use of the accidentally unused list_head on
> the mdev_device such that the mdev core keeps a list of all the mdev
> devices.  This allows us to validate that we have a valid mdev before
> we start removal, remove it from the list to prevent others from
> working on it, and if the vendor driver refuses to remove, we can
> re-add it to the list.
> 

Alex,

Writing 1 on 'remove' first removes itself, i.e. calls
device_remove_file_self(dev, attr). So if the file is removed then
device_remove_file_self() should return false, isn't that returns false?
kernfs_remove_self() hold the mutex that should handle this condition.

Thanks,
Kirti.


> Cc: Kirti Wankhede <kwankh...@nvidia.com>
> Signed-off-by: Alex Williamson <alex.william...@redhat.com>
> ---
>  drivers/vfio/mdev/mdev_core.c |   36 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index be1ee89..6bb4d4c 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -27,6 +27,9 @@
>  static DEFINE_MUTEX(parent_list_lock);
>  static struct class_compat *mdev_bus_compat_class;
>  
> +static LIST_HEAD(mdev_list);
> +static DEFINE_MUTEX(mdev_list_lock);
> +
>  static int _find_mdev_device(struct device *dev, void *data)
>  {
>       struct mdev_device *mdev;
> @@ -316,6 +319,11 @@ int mdev_device_create(struct kobject *kobj, struct 
> device *dev, uuid_le uuid)
>       dev_dbg(&mdev->dev, "MDEV: created\n");
>  
>       mutex_unlock(&parent->lock);
> +
> +     mutex_lock(&mdev_list_lock);
> +     list_add(&mdev->next, &mdev_list);
> +     mutex_unlock(&mdev_list_lock);
> +
>       return ret;
>  
>  create_failed:
> @@ -329,12 +337,30 @@ int mdev_device_create(struct kobject *kobj, struct 
> device *dev, uuid_le uuid)
>  
>  int mdev_device_remove(struct device *dev, bool force_remove)
>  {
> -     struct mdev_device *mdev;
> +     struct mdev_device *mdev, *tmp;
>       struct parent_device *parent;
>       struct mdev_type *type;
>       int ret;
> +     bool found = false;
>  
>       mdev = to_mdev_device(dev);
> +
> +     mutex_lock(&mdev_list_lock);
> +     list_for_each_entry(tmp, &mdev_list, next) {
> +             if (tmp == mdev) {
> +                     found = true;
> +                     break;
> +             }
> +     }
> +
> +     if (found)
> +             list_del(&mdev->next);
> +
> +     mutex_unlock(&mdev_list_lock);
> +
> +     if (!found)
> +             return -ENODEV;
> +
>       type = to_mdev_type(mdev->type_kobj);
>       parent = mdev->parent;
>       mutex_lock(&parent->lock);
> @@ -342,6 +368,11 @@ int mdev_device_remove(struct device *dev, bool 
> force_remove)
>       ret = mdev_device_remove_ops(mdev, force_remove);
>       if (ret) {
>               mutex_unlock(&parent->lock);
> +
> +             mutex_lock(&mdev_list_lock);
> +             list_add(&mdev->next, &mdev_list);
> +             mutex_unlock(&mdev_list_lock);
> +
>               return ret;
>       }
>  
> @@ -349,7 +380,8 @@ int mdev_device_remove(struct device *dev, bool 
> force_remove)
>       device_unregister(dev);
>       mutex_unlock(&parent->lock);
>       mdev_put_parent(parent);
> -     return ret;
> +
> +     return 0;
>  }
>  
>  static int __init mdev_init(void)
> 

Reply via email to