On 6/16/26 11:14, Geoffrey McRae wrote:
> From: geomcrae_amdeng <[email protected]>
> 
> Fix a sysfs duplication error when reinitializing the device:
> 
>   sysfs: cannot create duplicate filename '.../ip_discovery'
>   kobject_add_internal failed for ip_discovery with -EEXIST
>   ...
>   Failed to create device file mem_info_preempt_used (-17)
> 
> The failure is caused by stale sysfs entries not being removed during
> device teardown, leading to -EEXIST when the driver is reprobed. In
> particular:
> 
> - amdgpu_discovery sysfs kobjects were not fully torn down early enough,
>   and ip_top remained non-NULL after cleanup
> - the preempt manager sysfs attribute was removed only conditionally
>   and not during the common hw fini path
> 
> Fix this by:
> - making amdgpu_discovery_sysfs_fini() externally visible and clearing
>   adev->discovery.ip_top to prevent reuse
> - calling amdgpu_discovery_sysfs_fini() and
>   amdgpu_preempt_mgr_sysfs_fini() from
>   amdgpu_device_sys_interface_fini()
> 
> This ensures sysfs state is fully cleaned up before reprobe and avoids
> duplicate kobject/file creation.
> 
> Cc: Christian König <[email protected]>
> Cc: Alex Deucher <[email protected]>
> Signed-off-by: geomcrae_amdeng <[email protected]>

Reviewed-by: Christian König <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c      |  5 +++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c   |  5 ++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h   |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 14 +++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h         |  1 +
>  5 files changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 91f2506b9529..e72924976994 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3678,6 +3678,10 @@ static void amdgpu_device_sys_interface_fini(struct 
> amdgpu_device *adev)
>               amdgpu_pm_sysfs_fini(adev);
>       if (adev->ucode_sysfs_en)
>               amdgpu_ucode_sysfs_fini(adev);
> +
> +     amdgpu_discovery_sysfs_fini(adev);
> +     amdgpu_preempt_mgr_sysfs_fini(adev);
> +
>       amdgpu_device_attr_sysfs_fini(adev);
>       amdgpu_fru_sysfs_fini(adev);
>  
> @@ -4211,6 +4215,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
>  
>       if (adev->mman.initialized)
>               drain_workqueue(adev->mman.bdev.wq);
> +
>       adev->shutdown = true;
>  
>       unregister_pm_notifier(&adev->pm_nb);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 0c51e0fead40..a229fe9d043b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -759,8 +759,6 @@ static int amdgpu_discovery_init(struct amdgpu_device 
> *adev)
>       return r;
>  }
>  
> -static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
> -
>  void amdgpu_discovery_fini(struct amdgpu_device *adev)
>  {
>       if (adev->discovery.ip_top && !adev->discovery.ip_top->standalone_mode)
> @@ -1483,7 +1481,7 @@ static void amdgpu_discovery_sysfs_die_free(struct 
> ip_die_entry *ip_die_entry)
>       kobject_put(&ip_die_entry->ip_kset.kobj);
>  }
>  
> -static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
> +void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
>  {
>       struct ip_discovery_top *ip_top = adev->discovery.ip_top;
>       struct list_head *el, *tmp;
> @@ -1492,6 +1490,7 @@ static void amdgpu_discovery_sysfs_fini(struct 
> amdgpu_device *adev)
>       if (!ip_top)
>               return;
>  
> +     adev->discovery.ip_top = NULL;
>       die_kset = &ip_top->die_kset;
>       spin_lock(&die_kset->list_lock);
>       list_for_each_prev_safe(el, tmp, &die_kset->list) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> index edc78184e0f3..5b2b16f68576 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> @@ -41,6 +41,7 @@ struct amdgpu_discovery_info {
>       bool reserve_tmr;
>  };
>  
> +void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
>  void amdgpu_discovery_fini(struct amdgpu_device *adev);
>  int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> index 34b5e22b44e5..37ef5b9eb1cf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> @@ -46,6 +46,17 @@ static ssize_t mem_info_preempt_used_show(struct device 
> *dev,
>  
>  static DEVICE_ATTR_RO(mem_info_preempt_used);
>  
> +/**
> + * amdgpu_preempt_mgr_sysfs_fini - remove PREEMPT manager sysfs attributes
> + *
> + * @adev: amdgpu_device pointer
> + */
> +void amdgpu_preempt_mgr_sysfs_fini(struct amdgpu_device *adev)
> +{
> +     if (adev->dev->kobj.sd)
> +             device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
> +}
> +
>  /**
>   * amdgpu_preempt_mgr_new - allocate a new node
>   *
> @@ -137,9 +148,6 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
>       if (ret)
>               return;
>  
> -     if (adev->dev->kobj.sd)
> -             device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
> -
>       ttm_resource_manager_cleanup(man);
>       ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 2d72fa217274..00acec7226f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -140,6 +140,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
> uint64_t gtt_size);
>  void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
>  int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
>  void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
> +void amdgpu_preempt_mgr_sysfs_fini(struct amdgpu_device *adev);
>  int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
>  void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>  

Reply via email to