Hi, Christian,

On Mon, 2025-06-16 at 15:07 +0200, Christian König wrote:
> Instead of keeping a separate reference count for the TTM object also
> use
> the reference count for DRM GEM objects inside TTM.
> 
> Apart from avoiding two reference counts for one object this approach
> has
> the clear advantage of being able to use drm_exec inside TTM.

A couple of questions on the design direction here:

IIRC both xe and i915 has checks to consider objects with a 0 gem
refcount as zombies requiring special treatment or skipping, when
encountered in TTM callbacks. We need to double-check that.

But I wonder, 
first this practice of resurrecting refcounts seem a bit unusual, I
wonder if we can get rid of that somehow?

Furthermore, it seems the problem with drm_exec is related only to the
LRU walk. What about adding a struct completion to the object, that is
signaled when the object has freed its final backing-store. The LRU
walk would then check if the object is a zombie, and if so just wait on
the struct completion. (Need of course to carefully set up locking
orders). Then we wouldn't need to resurrect the gem refcount, nor use
drm_exec locking for zombies.

We would still need some form of refcounting while waiting on the
struct completion, but if we restricted the TTM refcount to *only* be
used internally for that sole purpose, and also replaced the final
ttm_bo_put() with the ttm_bo_finalize() that you suggest we wouldn't
need to resurrect that refcount since it wouldn't drop to zero until
the object is ready for final free.

Ideas, comments?

Thomas



> 
> Signed-off-by: Christian König <[email protected]>
> ---
>  .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  |   4 +-
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |   2 -
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 146 +++++++++-------
> --
>  drivers/gpu/drm/ttm/ttm_bo_internal.h         |   9 +-
>  drivers/gpu/drm/ttm/ttm_bo_util.c             |   2 +-
>  include/drm/ttm/ttm_bo.h                      |   9 --
>  6 files changed, 81 insertions(+), 91 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> index 6766e1753343..7b908920aae5 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> @@ -127,7 +127,7 @@ static void ttm_bo_init_reserved_sys_man(struct
> kunit *test)
>       dma_resv_unlock(bo->base.resv);
>  
>       KUNIT_EXPECT_EQ(test, err, 0);
> -     KUNIT_EXPECT_EQ(test, kref_read(&bo->kref), 1);
> +     KUNIT_EXPECT_EQ(test, kref_read(&bo->base.refcount), 1);
>       KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev);
>       KUNIT_EXPECT_EQ(test, bo->type, bo_type);
>       KUNIT_EXPECT_EQ(test, bo->page_alignment, PAGE_SIZE);
> @@ -176,7 +176,7 @@ static void ttm_bo_init_reserved_mock_man(struct
> kunit *test)
>       dma_resv_unlock(bo->base.resv);
>  
>       KUNIT_EXPECT_EQ(test, err, 0);
> -     KUNIT_EXPECT_EQ(test, kref_read(&bo->kref), 1);
> +     KUNIT_EXPECT_EQ(test, kref_read(&bo->base.refcount), 1);
>       KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev);
>       KUNIT_EXPECT_EQ(test, bo->type, bo_type);
>       KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> index b91c13f46225..7bc8eae77b8c 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> @@ -190,8 +190,6 @@ struct ttm_buffer_object
> *ttm_bo_kunit_init(struct kunit *test,
>       bo->bdev = devs->ttm_dev;
>       bo->destroy = dummy_ttm_bo_destroy;
>  
> -     kref_init(&bo->kref);
> -
>       return bo;
>  }
>  EXPORT_SYMBOL_GPL(ttm_bo_kunit_init);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c
> index abcf45bc3930..071cbad2fe9e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -48,14 +48,6 @@
>  #include "ttm_module.h"
>  #include "ttm_bo_internal.h"
>  
> -static void ttm_bo_release(struct kref *kref);
> -
> -/* TODO: remove! */
> -void ttm_bo_put(struct ttm_buffer_object *bo)
> -{
> -     kref_put(&bo->kref, ttm_bo_release);
> -}
> -
>  static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
>                                       struct ttm_placement
> *placement)
>  {
> @@ -252,82 +244,91 @@ static void ttm_bo_delayed_delete(struct
> work_struct *work)
>       ttm_bo_put(bo);
>  }
>  
> -static void ttm_bo_release(struct kref *kref)
> +static void ttm_bo_free(struct drm_gem_object *gobj)
> +{
> +     struct ttm_buffer_object *bo = container_of(gobj,
> typeof(*bo), base);
> +
> +     atomic_dec(&ttm_glob.bo_count);
> +     bo->destroy(bo);
> +}
> +
> +/*
> + * All other callbacks should never ever be called on a deleted TTM
> object.
> + */
> +static const struct drm_gem_object_funcs ttm_deleted_object_funcs =
> {
> +     .free = ttm_bo_free
> +};
> +
> +/* Returns true if the BO is about to get deleted */
> +static bool ttm_bo_is_zombie(struct ttm_buffer_object *bo)
> +{
> +     return bo->base.funcs == &ttm_deleted_object_funcs;
> +}
> +
> +void ttm_bo_fini(struct ttm_buffer_object *bo)
>  {
> -     struct ttm_buffer_object *bo =
> -         container_of(kref, struct ttm_buffer_object, kref);
>       struct ttm_device *bdev = bo->bdev;
>       int ret;
>  
>       WARN_ON_ONCE(bo->pin_count);
>       WARN_ON_ONCE(bo->bulk_move);
>  
> -     if (!bo->deleted) {
> -             ret = ttm_bo_individualize_resv(bo);
> -             if (ret) {
> -                     /* Last resort, if we fail to allocate
> memory for the
> -                      * fences block for the BO to become idle
> -                      */
> -                     dma_resv_wait_timeout(bo->base.resv,
> -                                          
> DMA_RESV_USAGE_BOOKKEEP, false,
> -                                           30 * HZ);
> -             }
> +     ret = ttm_bo_individualize_resv(bo);
> +     if (ret) {
> +             /* Last resort, if we fail to allocate memory for
> the
> +              * fences block for the BO to become idle
> +              */
> +             dma_resv_wait_timeout(bo->base.resv,
> DMA_RESV_USAGE_BOOKKEEP,
> +                                   false, 30 * HZ);
> +     }
>  
> -             if (bo->bdev->funcs->release_notify)
> -                     bo->bdev->funcs->release_notify(bo);
> -
> -             drm_vma_offset_remove(bdev->vma_manager, &bo-
> >base.vma_node);
> -             ttm_mem_io_free(bdev, bo->resource);
> -
> -             if (!dma_resv_test_signaled(&bo->base._resv,
> -                                         DMA_RESV_USAGE_BOOKKEEP)
> ||
> -                 (want_init_on_free() && (bo->ttm != NULL)) ||
> -                 bo->type == ttm_bo_type_sg ||
> -                 !dma_resv_trylock(bo->base.resv)) {
> -                     /* The BO is not idle, resurrect it for
> delayed destroy */
> -                     ttm_bo_flush_all_fences(bo);
> -                     bo->deleted = true;
> -
> -                     spin_lock(&bo->bdev->lru_lock);
> -
> -                     /*
> -                      * Make pinned bos immediately available to
> -                      * shrinkers, now that they are queued for
> -                      * destruction.
> -                      *
> -                      * FIXME: QXL is triggering this. Can be
> removed when the
> -                      * driver is fixed.
> -                      */
> -                     if (bo->pin_count) {
> -                             bo->pin_count = 0;
> -                             ttm_resource_move_to_lru_tail(bo-
> >resource);
> -                     }
> +     if (bo->bdev->funcs->release_notify)
> +             bo->bdev->funcs->release_notify(bo);
> +
> +     drm_vma_offset_remove(bdev->vma_manager, &bo-
> >base.vma_node);
> +     ttm_mem_io_free(bdev, bo->resource);
>  
> -                     kref_init(&bo->kref);
> -                     spin_unlock(&bo->bdev->lru_lock);
> +     if (!dma_resv_test_signaled(&bo->base._resv,
> DMA_RESV_USAGE_BOOKKEEP) ||
> +         (want_init_on_free() && (bo->ttm != NULL)) ||
> +         bo->type == ttm_bo_type_sg ||
> +         !dma_resv_trylock(bo->base.resv)) {
> +             /* The BO is not idle, resurrect it for delayed
> destroy */
> +             ttm_bo_flush_all_fences(bo);
>  
> -                     INIT_WORK(&bo->delayed_delete,
> ttm_bo_delayed_delete);
> +             spin_lock(&bo->bdev->lru_lock);
>  
> -                     /* Schedule the worker on the closest NUMA
> node. This
> -                      * improves performance since system memory
> might be
> -                      * cleared on free and that is best done on
> a CPU core
> -                      * close to it.
> -                      */
> -                     queue_work_node(bdev->pool.nid, bdev->wq,
> &bo->delayed_delete);
> -                     return;
> +             /*
> +              * Make pinned bos immediately available to
> +              * shrinkers, now that they are queued for
> +              * destruction.
> +              *
> +              * FIXME: QXL is triggering this. Can be removed
> when the
> +              * driver is fixed.
> +              */
> +             if (bo->pin_count) {
> +                     bo->pin_count = 0;
> +                     ttm_resource_move_to_lru_tail(bo->resource);
>               }
>  
> +             kref_init(&bo->base.refcount);
> +             bo->base.funcs = &ttm_deleted_object_funcs;
> +             spin_unlock(&bo->bdev->lru_lock);
> +
> +             INIT_WORK(&bo->delayed_delete,
> ttm_bo_delayed_delete);
> +
> +             /* Schedule the worker on the closest NUMA node.
> This
> +              * improves performance since system memory might be
> +              * cleared on free and that is best done on a CPU
> core
> +              * close to it.
> +              */
> +             queue_work_node(bdev->pool.nid, bdev->wq, &bo-
> >delayed_delete);
> +     } else {
>               ttm_bo_cleanup_memtype_use(bo);
>               dma_resv_unlock(bo->base.resv);
> -     }
>  
> -     atomic_dec(&ttm_glob.bo_count);
> -     bo->destroy(bo);
> -}
> -
> -void ttm_bo_fini(struct ttm_buffer_object *bo)
> -{
> -     ttm_bo_put(bo);
> +             atomic_dec(&ttm_glob.bo_count);
> +             bo->destroy(bo);
> +     }
>  }
>  EXPORT_SYMBOL(ttm_bo_fini);
>  
> @@ -471,7 +472,7 @@ int ttm_bo_evict_first(struct ttm_device *bdev,
> struct ttm_resource_manager *man
>       if (!bo->resource || bo->resource->mem_type != mem_type)
>               goto out_bo_moved;
>  
> -     if (bo->deleted) {
> +     if (ttm_bo_is_zombie(bo)) {
>               ret = ttm_bo_wait_ctx(bo, ctx);
>               if (!ret)
>                       ttm_bo_cleanup_memtype_use(bo);
> @@ -525,7 +526,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk
> *walk, struct ttm_buffer_object *
>       if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo,
> evict_walk->place))
>               return 0;
>  
> -     if (bo->deleted) {
> +     if (ttm_bo_is_zombie(bo)) {
>               lret = ttm_bo_wait_ctx(bo, walk->ctx);
>               if (!lret)
>                       ttm_bo_cleanup_memtype_use(bo);
> @@ -623,7 +624,6 @@ static int ttm_bo_evict_alloc(struct ttm_device
> *bdev,
>  void ttm_bo_pin(struct ttm_buffer_object *bo)
>  {
>       dma_resv_assert_held(bo->base.resv);
> -     WARN_ON_ONCE(!kref_read(&bo->kref));
>       spin_lock(&bo->bdev->lru_lock);
>       if (bo->resource)
>               ttm_resource_del_bulk_move(bo->resource, bo);
> @@ -642,7 +642,6 @@ EXPORT_SYMBOL(ttm_bo_pin);
>  void ttm_bo_unpin(struct ttm_buffer_object *bo)
>  {
>       dma_resv_assert_held(bo->base.resv);
> -     WARN_ON_ONCE(!kref_read(&bo->kref));
>       if (WARN_ON_ONCE(!bo->pin_count))
>               return;
>  
> @@ -931,7 +930,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
> struct ttm_buffer_object *bo,
>  {
>       int ret;
>  
> -     kref_init(&bo->kref);
>       bo->bdev = bdev;
>       bo->type = type;
>       bo->page_alignment = alignment;
> @@ -1127,7 +1125,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk,
> struct ttm_buffer_object *bo)
>               goto out;
>       }
>  
> -     if (bo->deleted) {
> +     if (ttm_bo_is_zombie(bo)) {
>               pgoff_t num_pages = bo->ttm->num_pages;
>  
>               ret = ttm_bo_wait_ctx(bo, ctx);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_internal.h
> b/drivers/gpu/drm/ttm/ttm_bo_internal.h
> index e0d48eac74b0..81327bc73834 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_internal.h
> +++ b/drivers/gpu/drm/ttm/ttm_bo_internal.h
> @@ -34,7 +34,7 @@
>   */
>  static inline void ttm_bo_get(struct ttm_buffer_object *bo)
>  {
> -     kref_get(&bo->kref);
> +     drm_gem_object_get(&bo->base);
>  }
>  
>  /**
> @@ -50,11 +50,14 @@ static inline void ttm_bo_get(struct
> ttm_buffer_object *bo)
>  static inline __must_check struct ttm_buffer_object *
>  ttm_bo_get_unless_zero(struct ttm_buffer_object *bo)
>  {
> -     if (!kref_get_unless_zero(&bo->kref))
> +     if (!kref_get_unless_zero(&bo->base.refcount))
>               return NULL;
>       return bo;
>  }
>  
> -void ttm_bo_put(struct ttm_buffer_object *bo);
> +static inline void ttm_bo_put(struct ttm_buffer_object *bo)
> +{
> +     drm_gem_object_put(&bo->base);
> +}
>  
>  #endif
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 56f3152f34f5..56645039757e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -245,7 +245,7 @@ static int ttm_buffer_object_transfer(struct
> ttm_buffer_object *bo,
>       atomic_inc(&ttm_glob.bo_count);
>       drm_vma_node_reset(&fbo->base.base.vma_node);
>  
> -     kref_init(&fbo->base.kref);
> +     kref_init(&fbo->base.base.refcount);
>       fbo->base.destroy = &ttm_transfered_destroy;
>       fbo->base.pin_count = 0;
>       if (bo->type != ttm_bo_type_sg)
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 4b0552d1bc55..4fe4031f0165 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -78,11 +78,8 @@ enum ttm_bo_type {
>   * @type: The bo type.
>   * @page_alignment: Page alignment.
>   * @destroy: Destruction function. If NULL, kfree is used.
> - * @kref: Reference count of this buffer object. When this refcount
> reaches
> - * zero, the object is destroyed or put on the delayed delete list.
>   * @resource: structure describing current placement.
>   * @ttm: TTM structure holding system pages.
> - * @deleted: True if the object is only a zombie and already
> deleted.
>   * @bulk_move: The bulk move object.
>   * @priority: Priority for LRU, BOs with lower priority are evicted
> first.
>   * @pin_count: Pin count.
> @@ -109,17 +106,11 @@ struct ttm_buffer_object {
>       uint32_t page_alignment;
>       void (*destroy) (struct ttm_buffer_object *);
>  
> -     /*
> -     * Members not needing protection.
> -     */
> -     struct kref kref;
> -
>       /*
>        * Members protected by the bo::resv::reserved lock.
>        */
>       struct ttm_resource *resource;
>       struct ttm_tt *ttm;
> -     bool deleted;
>       struct ttm_lru_bulk_move *bulk_move;
>       unsigned priority;
>       unsigned pin_count;

Reply via email to