On 12/11/25 17:12, Tvrtko Ursulin wrote:
> 
> On 11/12/2025 13:16, Christian König wrote:
>> This allows amdgpu_userq_fences to outlive the amdgpu module.
>>
>> Signed-off-by: Christian König <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       | 13 +----
>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 54 ++++---------------
>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  8 ---
>>   3 files changed, 11 insertions(+), 64 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 2dfbddcef9ab..f206297aae8b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -3155,11 +3155,7 @@ static int __init amdgpu_init(void)
>>         r = amdgpu_sync_init();
>>       if (r)
>> -        goto error_sync;
>> -
>> -    r = amdgpu_userq_fence_slab_init();
>> -    if (r)
>> -        goto error_fence;
>> +        return r;
>>         DRM_INFO("amdgpu kernel modesetting enabled.\n");
>>       amdgpu_register_atpx_handler();
>> @@ -3176,12 +3172,6 @@ static int __init amdgpu_init(void)
>>         /* let modprobe override vga console setting */
>>       return pci_register_driver(&amdgpu_kms_pci_driver);
>> -
>> -error_fence:
>> -    amdgpu_sync_fini();
>> -
>> -error_sync:
>> -    return r;
>>   }
>>     static void __exit amdgpu_exit(void)
>> @@ -3191,7 +3181,6 @@ static void __exit amdgpu_exit(void)
>>       amdgpu_unregister_atpx_handler();
>>       amdgpu_acpi_release();
>>       amdgpu_sync_fini();
>> -    amdgpu_userq_fence_slab_fini();
>>       mmu_notifier_synchronize();
>>       amdgpu_xcp_drv_release();
>>   }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> index eba9fb359047..bb19f72770b0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -33,26 +33,6 @@
>>   #include "amdgpu_userq_fence.h"
>>     static const struct dma_fence_ops amdgpu_userq_fence_ops;
>> -static struct kmem_cache *amdgpu_userq_fence_slab;
>> -
>> -int amdgpu_userq_fence_slab_init(void)
>> -{
>> -    amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
>> -                            sizeof(struct amdgpu_userq_fence),
>> -                            0,
>> -                            SLAB_HWCACHE_ALIGN,
>> -                            NULL);
>> -    if (!amdgpu_userq_fence_slab)
>> -        return -ENOMEM;
>> -
>> -    return 0;
>> -}
>> -
>> -void amdgpu_userq_fence_slab_fini(void)
>> -{
>> -    rcu_barrier();
> 
> What was this rcu_barrier() for? Cargo culted or more to it?

All dma_fences are RCU protected. When they are backed by a kmem_cache you need 
to make sure to wait for an RCU grace period to pass before destroying the 
kmem_cache.

Since the dma_fence framework now uses kfree_rcu that shouldn't be problematic 
any more.


>> -    kmem_cache_destroy(amdgpu_userq_fence_slab);
>> -}
>>     static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct 
>> dma_fence *f)
>>   {
>> @@ -227,7 +207,7 @@ void amdgpu_userq_fence_driver_put(struct 
>> amdgpu_userq_fence_driver *fence_drv)
>>     static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence 
>> **userq_fence)
>>   {
>> -    *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
>> +    *userq_fence = kmalloc(sizeof(**userq_fence), GFP_ATOMIC);
> This GFP_ATOMIC is suboptimal for sure being on the ioctl path. It is outside 
> of the scope for this patch, but once my userq cleanup patches get reviewed 
> next on my list was to try and understand this.
>>       return *userq_fence ? 0 : -ENOMEM;
>>   }
>>   @@ -243,12 +223,11 @@ static int amdgpu_userq_fence_create(struct 
>> amdgpu_usermode_queue *userq,
>>       if (!fence_drv)
>>           return -EINVAL;
>>   -    spin_lock_init(&userq_fence->lock);
>>       INIT_LIST_HEAD(&userq_fence->link);
>>       fence = &userq_fence->base;
>>       userq_fence->fence_drv = fence_drv;
>>   -    dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
>> +    dma_fence_init64(fence, &amdgpu_userq_fence_ops, NULL,
>>                fence_drv->context, seq);
>>         amdgpu_userq_fence_driver_get(fence_drv);
>> @@ -318,35 +297,22 @@ static bool amdgpu_userq_fence_signaled(struct 
>> dma_fence *f)
>>       rptr = amdgpu_userq_fence_read(fence_drv);
>>       wptr = fence->base.seqno;
>>   -    if (rptr >= wptr)
>> +    if (rptr >= wptr) {
>> +        amdgpu_userq_fence_driver_put(fence->fence_drv);
> 
> fence_drv is in a local already.
> 
>> +        fence->fence_drv = NULL;
> 
> amdgpu_userq_fence_get_timeline_name could now oops somehow?
> 
>> +
>> +        kvfree(fence->fence_drv_array);
>> +        fence->fence_drv_array = NULL;
> 
> Not sure if this is safe either. amdgpu_userq_fence_driver_process() drops 
> its reference before it unlinks the fence from the list. Can someone external 
> trigger the fence_is_signaled check, before the interrupt processing kicks 
> in, which will clear fence_drv_array, and so 
> amdgpu_userq_fence_driver_process() would oops?

Oh, good question. I need to double check that.

Thanks,
Christian.

> 
> Regards,
> 
> Tvrtko
> 
>>           return true;
>> +    }
>>         return false;
>>   }
>>   -static void amdgpu_userq_fence_free(struct rcu_head *rcu)
>> -{
>> -    struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
>> -    struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
>> -    struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
>> -
>> -    /* Release the fence driver reference */
>> -    amdgpu_userq_fence_driver_put(fence_drv);
>> -
>> -    kvfree(userq_fence->fence_drv_array);
>> -    kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>> -}
>> -
>> -static void amdgpu_userq_fence_release(struct dma_fence *f)
>> -{
>> -    call_rcu(&f->rcu, amdgpu_userq_fence_free);
>> -}
>> -
>>   static const struct dma_fence_ops amdgpu_userq_fence_ops = {
>>       .get_driver_name = amdgpu_userq_fence_get_driver_name,
>>       .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
>>       .signaled = amdgpu_userq_fence_signaled,
>> -    .release = amdgpu_userq_fence_release,
>>   };
>>     /**
>> @@ -560,7 +526,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, 
>> void *data,
>>       r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
>>       if (r) {
>>           mutex_unlock(&userq_mgr->userq_mutex);
>> -        kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>> +        kfree(userq_fence);
>>           goto put_gobj_write;
>>       }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> index d76add2afc77..6f04782f3ea9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> @@ -31,11 +31,6 @@
>>     struct amdgpu_userq_fence {
>>       struct dma_fence base;
>> -    /*
>> -     * This lock is necessary to synchronize the
>> -     * userqueue dma fence operations.
>> -     */
>> -    spinlock_t lock;
>>       struct list_head link;
>>       unsigned long fence_drv_array_count;
>>       struct amdgpu_userq_fence_driver *fence_drv;
>> @@ -58,9 +53,6 @@ struct amdgpu_userq_fence_driver {
>>       char timeline_name[TASK_COMM_LEN];
>>   };
>>   -int amdgpu_userq_fence_slab_init(void);
>> -void amdgpu_userq_fence_slab_fini(void);
>> -
>>   void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver 
>> *fence_drv);
>>   void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver 
>> *fence_drv);
>>   int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> 

Reply via email to