On 12/11/25 16:08, Philipp Stanner wrote:
> On Thu, 2025-12-11 at 13:16 +0100, Christian König wrote:
>> This allows amdkfd_fences to outlive the amdgpu module.
>>
>> v2: implement Felix suggestion to lock the fence while signaling it.
>> v3: fix typos
>> v4: fix return code in signal_eviction_fence
>>
>> Signed-off-by: Christian König <[email protected]>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  7 +++
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  | 44 +++++++++----------
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  4 +-
>>  4 files changed, 31 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 8bdfcde2029b..2f2b277cfaed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void 
>> *data);
>>  #endif
>>  #if IS_ENABLED(CONFIG_HSA_AMD)
>>  bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
>> +bool amdkfd_fence_signal(struct dma_fence *f);
>>  struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
>>  void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
>>  int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
>> @@ -210,6 +211,12 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct 
>> mm_struct *mm)
>>      return false;
>>  }
>>  
>> +static inline
>> +bool amdkfd_fence_signal(struct dma_fence *f)
>> +{
>> +    return false;
>> +}
> 
> Huh? What's that?
> 
> That function seems to be just a NOP. It's return code is used nowhere,
> is it?

It's the dummy which is used when CONFIG_HSA_AMD isn't enabled.

Not sure if it's actually used or not, but we have dummies for all functions 
declared in this file.

> 
>> +
>>  static inline
>>  struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
>>  {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> index 09c919f72b6c..9cd413e325f0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct 
>> dma_fence *f)
>>              if (!svm_range_schedule_evict_svm_bo(fence))
>>                      return true;
>>      }
>> -    return false;
>> -}
>> -
>> -/**
>> - * amdkfd_fence_release - callback that fence can be freed
>> - *
>> - * @f: dma_fence
>> - *
>> - * This function is called when the reference count becomes zero.
>> - * Drops the mm_struct reference and RCU schedules freeing up the fence.
>> - */
>> -static void amdkfd_fence_release(struct dma_fence *f)
>> -{
>> -    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> -
>> -    /* Unconditionally signal the fence. The process is getting
>> -     * terminated.
>> -     */
>> -    if (WARN_ON(!fence))
>> -            return; /* Not an amdgpu_amdkfd_fence */
>> -
>>      mmdrop(fence->mm);
>> -    kfree_rcu(f, rcu);
>> +    fence->mm = NULL;
>> +    return false;
>>  }
>>  
>>  /**
>> @@ -174,9 +154,27 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct 
>> mm_struct *mm)
>>      return false;
>>  }
>>  
>> +bool amdkfd_fence_signal(struct dma_fence *f)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> +    unsigned long flags;
>> +    bool was_signaled;
>> +
>> +    dma_fence_lock_irqsave(f, flags);
>> +    if (fence->mm) {
>> +            mmdrop(fence->mm);
>> +            fence->mm = NULL;
>> +    }
>> +    was_signaled = dma_fence_is_signaled_locked(f);
>> +    if (!was_signaled)
>> +            dma_fence_signal_locked(f);
>> +    dma_fence_unlock_irqrestore(f, flags);
>> +
>> +    return was_signaled;
>> +}
>> +
>>  static const struct dma_fence_ops amdkfd_fence_ops = {
>>      .get_driver_name = amdkfd_fence_get_driver_name,
>>      .get_timeline_name = amdkfd_fence_get_timeline_name,
>>      .enable_signaling = amdkfd_fence_enable_signaling,
>> -    .release = amdkfd_fence_release,
>>  };
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index bb252ec43733..2cf39e3d3fae 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct 
>> *work)
>>      synchronize_rcu();
>>      ef = rcu_access_pointer(p->ef);
>>      if (ef)
>> -            dma_fence_signal(ef);
>> +            amdkfd_fence_signal(ef);
>>  
>>      kfd_process_remove_sysfs(p);
>>      kfd_debugfs_remove_process(p);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 97c2270f278f..0e94f3a976b1 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
>>  
>>      if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>>              /* We're not in the eviction worker. Signal the fence. */
>> -            dma_fence_signal(&svm_bo->eviction_fence->base);
>> +            amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>>      dma_fence_put(&svm_bo->eviction_fence->base);
>>      amdgpu_bo_unref(&svm_bo->bo);
>>      kfree(svm_bo);
>> @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct 
>> work_struct *work)
>>      mmap_read_unlock(mm);
>>      mmput(mm);
>>  
>> -    dma_fence_signal(&svm_bo->eviction_fence->base);
>> +    amdkfd_fence_signal(&svm_bo->eviction_fence->base);
> 
> 
> And why do you do those changes and why doesn't the commit message
> explain it?
> 
> You stop signalling those fences, after all.

Hui? I don't stop signaling the fences. I just delegate signaling into a 
separate helper function which does some extra cleanup before signaling the 
fence.

Regards,
Christian.

> 
> 
> P.
> 
>>  
>>      /* This is the last reference to svm_bo, after svm_range_vram_node_free
>>       * has been called in svm_migrate_vram_to_ram
> 

Reply via email to