ttm: rework pipelined eviction fence handling

Christian König Wed, 05 Nov 2025 05:00:52 -0800

On 11/5/25 11:34, Pierre-Eric Pelloux-Prayer wrote:
>>
>>> +        } else {
>>> +            all_signaled = false;
>>> +            if (no_wait_gpu) {
>>> +                spin_unlock(&man->pipelined_eviction.lock);
>>> +                return -EBUSY;
>>> +            }
>>> +            fences_to_add[n++] = dma_fence_get(fence);
>>> +        }
>>> +    }
>>> +    spin_unlock(&man->pipelined_eviction.lock);
>>> +
>>> +    if (all_signaled)
>>>           return 0;
>>>   -    if (no_wait_gpu) {
>>> -        ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
>>> -        dma_fence_put(fence);
>>> -        return ret;
>>> +    for (i = 0; i < n; i++) {
>>> +        dma_resv_add_fence(bo->base.resv, fences_to_add[i], 
>>> DMA_RESV_USAGE_KERNEL);
>>> +        dma_fence_put(fences_to_add[i]);
>>>       }
>>>   -    dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
>>> -
>>> -    ret = dma_resv_reserve_fences(bo->base.resv, 1);
>>> -    dma_fence_put(fence);
>>> -    return ret;
>>> +    return dma_resv_reserve_fences(bo->base.resv, 
>>> TTM_FENCES_MAX_SLOT_COUNT);
>>
>> Please separate out a patch where the call to dma_resv_reserve_fences() is 
>> removed here.
> 
> Can you remind me why it's not needed?


Some years ago the dma_resv object had a fixed field for an exclusive fence.

When we removed that we sprinkled calls like 
"dma_resv_reserve_fences(bo->base.resv, 1)" all over the place where we 
previously used this exclusive fence slot to prevent things from going boom!

It could be that some old drivers like radeon or qxl still rely on that 
somewhere, but that would then clearly be a driver bug.

What we could do is to either leave it alone or remove it, but changing it to 
reserving TTM_FENCES_MAX_SLOT_COUNT is clearly not correct.

>>
>>>   }
>>>     /**
>>> @@ -718,7 +732,7 @@ static int ttm_bo_alloc_resource(struct 
>>> ttm_buffer_object *bo,
>>>       int i, ret;
>>>         ticket = dma_resv_locking_ctx(bo->base.resv);
>>> -    ret = dma_resv_reserve_fences(bo->base.resv, 1);
>>> +    ret = dma_resv_reserve_fences(bo->base.resv, 
>>> TTM_FENCES_MAX_SLOT_COUNT);
>>>       if (unlikely(ret))
>>>           return ret;
>>>   @@ -757,7 +771,7 @@ static int ttm_bo_alloc_resource(struct 
>>> ttm_buffer_object *bo,
>>>                   return ret;
>>>           }
>>>   -        ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
>>> +        ret = ttm_bo_add_pipelined_eviction_fences(bo, man, 
>>> ctx->no_wait_gpu);
>>>           if (unlikely(ret)) {
>>>               ttm_resource_free(bo, res);
>>>               if (ret == -EBUSY)
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
>>> b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> index acbbca9d5c92..ada8af965acf 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct 
>>> ttm_buffer_object *bo,
>>>       ret = dma_resv_trylock(&fbo->base.base._resv);
>>>       WARN_ON(!ret);
>>>   -    ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
>>> +    ret = dma_resv_reserve_fences(&fbo->base.base._resv, 
>>> TTM_FENCES_MAX_SLOT_COUNT);
>>>       if (ret) {
>>>           dma_resv_unlock(&fbo->base.base._resv);
>>>           kfree(fbo);
>>> @@ -646,6 +646,8 @@ static void ttm_bo_move_pipeline_evict(struct 
>>> ttm_buffer_object *bo,
>>>   {
>>>       struct ttm_device *bdev = bo->bdev;
>>>       struct ttm_resource_manager *from;
>>> +    struct dma_fence *tmp;
>>> +    int i, free_slot = -1;
>>>         from = ttm_manager_type(bdev, bo->resource->mem_type);
>>>   @@ -653,13 +655,35 @@ static void ttm_bo_move_pipeline_evict(struct 
>>> ttm_buffer_object *bo,
>>>        * BO doesn't have a TTM we need to bind/unbind. Just remember
>>>        * this eviction and free up the allocation
>>>        */
>>> -    spin_lock(&from->move_lock);
>>> -    if (!from->move || dma_fence_is_later(fence, from->move)) {
>>> -        dma_fence_put(from->move);
>>> -        from->move = dma_fence_get(fence);
>>> +    spin_lock(&from->pipelined_eviction.lock);
>>> +    for (i = 0; i < from->pipelined_eviction.n_fences; i++) {
>>> +        tmp = from->pipelined_eviction.fences[i];
>>> +        if (!tmp) {
>>> +            if (free_slot < 0)
>>> +                free_slot = i;
>>> +            continue;
>>
>> Just break here.
> 
> The logic here is to reuse context slots. Even if slot 0 is empty, I need to 
> use slot 1 if slot 1's context is the same as fence->context.

Good point, but slot 0 should never be empty. See we fill the slots starting 
from 0 and then incrementing you either have NULL or a slot which doesn't match.

> This way, we're guaranteed to find a slot for all contexts used by the driver.
> 
>>
>>> +        }
>>> +        if (fence->context != tmp->context)
>>> +            continue;
>>> +        if (dma_fence_is_later(fence, tmp)) {
>>> +            dma_fence_put(tmp);
>>> +            free_slot = i;
>>> +            break;
>>> +        }
>>> +        goto unlock;
>>> +    }
>>> +    if (free_slot >= 0) {
>>
>> Drop free_slot and check i here.
>>
>>> +        from->pipelined_eviction.fences[free_slot] = dma_fence_get(fence);
>>> +    } else {
>>> +        WARN(1, "not enough fence slots for all fence contexts");
>>> +        spin_unlock(&from->pipelined_eviction.lock);
>>> +        dma_fence_wait(fence, false);
>>> +        goto end;
>>>       }
>>> -    spin_unlock(&from->move_lock);
>>>   +unlock:
>>> +    spin_unlock(&from->pipelined_eviction.lock);
>>> +end:
>>>       ttm_resource_free(bo, &bo->resource);
>>>   }
>>>   diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
>>> b/drivers/gpu/drm/ttm/ttm_resource.c
>>> index e2c82ad07eb4..ae0d4621cc55 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>>> @@ -523,14 +523,19 @@ void ttm_resource_manager_init(struct 
>>> ttm_resource_manager *man,
>>>   {
>>>       unsigned i;
>>>   -    spin_lock_init(&man->move_lock);
>>>       man->bdev = bdev;
>>>       man->size = size;
>>>       man->usage = 0;
>>>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>>           INIT_LIST_HEAD(&man->lru[i]);
>>> -    man->move = NULL;
>>> +    spin_lock_init(&man->pipelined_eviction.lock);
>>> +    for (i = 0; i < TTM_FENCES_MAX_SLOT_COUNT; i++)
>>> +        man->pipelined_eviction.fences[i] = NULL;
>>> +    /* Can be overridden by drivers that wants to use more than 1 entity
>>> +     * for moves and evictions (limited to TTM_FENCES_MAX_SLOT_COUNT).
>>> +     */
>>> +    man->pipelined_eviction.n_fences = 1;
>>>   }
>>>   EXPORT_SYMBOL(ttm_resource_manager_init);
>>>   @@ -551,7 +556,7 @@ int ttm_resource_manager_evict_all(struct ttm_device 
>>> *bdev,
>>>           .no_wait_gpu = false,
>>>       };
>>>       struct dma_fence *fence;
>>> -    int ret;
>>> +    int ret, i;
>>>         do {
>>>           ret = ttm_bo_evict_first(bdev, man, &ctx);
>>> @@ -561,18 +566,32 @@ int ttm_resource_manager_evict_all(struct ttm_device 
>>> *bdev,
>>>       if (ret && ret != -ENOENT)
>>>           return ret;
>>>   -    spin_lock(&man->move_lock);
>>> -    fence = dma_fence_get(man->move);
>>> -    spin_unlock(&man->move_lock);
>>> +    ret = 0;
>>>   -    if (fence) {
>>> -        ret = dma_fence_wait(fence, false);
>>> -        dma_fence_put(fence);
>>> -        if (ret)
>>> -            return ret;
>>> -    }
>>> +    do {
>>> +        fence = NULL;
>>>   -    return 0;
>>> +        spin_lock(&man->pipelined_eviction.lock);
>>> +        for (i = 0; i < man->pipelined_eviction.n_fences; i++) {
>>> +            fence = man->pipelined_eviction.fences[i];
>>
>>> +            man->pipelined_eviction.fences[i] = NULL;
>>
>> Drop that. We should never set man->pipelined_eviction.fences to NULL.
> 
> Why?

To simplify the logic while filling the slots.

dma_fences are made to be keept around for long.

>>
>>> +
>>>   /**
>>>    * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
>>>    */
>>> @@ -180,8 +189,10 @@ struct ttm_resource_manager_func {
>>>    * @size: Size of the managed region.
>>>    * @bdev: ttm device this manager belongs to
>>>    * @func: structure pointer implementing the range manager. See above
>>> - * @move_lock: lock for move fence
>>> - * @move: The fence of the last pipelined move operation.
>>> + * @pipelined_eviction.lock: lock for eviction fences
>>> + * @pipelined_eviction.n_fences: The number of fences allowed in the 
>>> array. If
>>> + * 0, pipelined evictions aren't used.
>>> + * @pipelined_eviction.fences: The fences of the last pipelined move 
>>> operation.
>>>    * @lru: The lru list for this memory type.
>>>    *
>>>    * This structure is used to identify and manage memory types for a 
>>> device.
>>> @@ -195,12 +206,15 @@ struct ttm_resource_manager {
>>>       struct ttm_device *bdev;
>>>       uint64_t size;
>>>       const struct ttm_resource_manager_func *func;
>>> -    spinlock_t move_lock;
>>>   -    /*
>>> -     * Protected by @move_lock.
>>> +    /* This is very similar to a dma_resv object, but locking rules make
>>> +     * it difficult to use a it in this context.
>>>        */
>>> -    struct dma_fence *move;
>>> +    struct {
>>> +        spinlock_t lock;
>>> +        int n_fences;
>>> +        struct dma_fence *fences[TTM_FENCES_MAX_SLOT_COUNT];
>>> +    } pipelined_eviction;
>>
>> Drop the separate structure, just make move an array instead.
> 
> IMO pipelined_eviction.fences and pipelined_eviction.lock is clearer when 
> reading the code than moves and move_lock but if you prefer I'll switch back 
> to the old names.

The name "pipelined_eviction" is just a bit to long. Maybe just eviction_fences 
and eviction_fences_lock?

Regards,
Christian.
>>
>> And also drop n_fences. Just always take a look at all fences.
> 
> OK.
> 
> Thanks,
> Pierre-Eric
> 
>>
>> Regards,
>> Christian.
>>
>>>         /*
>>>        * Protected by the bdev->lru_lock.
>>> @@ -421,8 +435,12 @@ static inline bool ttm_resource_manager_used(struct 
>>> ttm_resource_manager *man)
>>>   static inline void
>>>   ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>>>   {
>>> -    dma_fence_put(man->move);
>>> -    man->move = NULL;
>>> +    int i;
>>> +
>>> +    for (i = 0; i < TTM_FENCES_MAX_SLOT_COUNT; i++) {
>>> +        dma_fence_put(man->pipelined_eviction.fences[i]);
>>> +        man->pipelined_eviction.fences[i] = NULL;
>>> +    }
>>>   }
>>>     void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);

Re: [PATCH v1 02/20] drm/ttm: rework pipelined eviction fence handling

Reply via email to