Hi, Christian, On Mon, 2025-06-16 at 15:07 +0200, Christian König wrote: > Instead of keeping a separate reference count for the TTM object also > use > the reference count for DRM GEM objects inside TTM. > > Apart from avoiding two reference counts for one object this approach > has > the clear advantage of being able to use drm_exec inside TTM.
A couple of questions on the design direction here: IIRC both xe and i915 has checks to consider objects with a 0 gem refcount as zombies requiring special treatment or skipping, when encountered in TTM callbacks. We need to double-check that. But I wonder, first this practice of resurrecting refcounts seem a bit unusual, I wonder if we can get rid of that somehow? Furthermore, it seems the problem with drm_exec is related only to the LRU walk. What about adding a struct completion to the object, that is signaled when the object has freed its final backing-store. The LRU walk would then check if the object is a zombie, and if so just wait on the struct completion. (Need of course to carefully set up locking orders). Then we wouldn't need to resurrect the gem refcount, nor use drm_exec locking for zombies. We would still need some form of refcounting while waiting on the struct completion, but if we restricted the TTM refcount to *only* be used internally for that sole purpose, and also replaced the final ttm_bo_put() with the ttm_bo_finalize() that you suggest we wouldn't need to resurrect that refcount since it wouldn't drop to zero until the object is ready for final free. Ideas, comments? Thomas > > Signed-off-by: Christian König <[email protected]> > --- > .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 4 +- > drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c | 2 - > drivers/gpu/drm/ttm/ttm_bo.c | 146 +++++++++------- > -- > drivers/gpu/drm/ttm/ttm_bo_internal.h | 9 +- > drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- > include/drm/ttm/ttm_bo.h | 9 -- > 6 files changed, 81 insertions(+), 91 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > index 6766e1753343..7b908920aae5 100644 > --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > @@ -127,7 +127,7 @@ static void ttm_bo_init_reserved_sys_man(struct > kunit *test) > dma_resv_unlock(bo->base.resv); > > KUNIT_EXPECT_EQ(test, err, 0); > - KUNIT_EXPECT_EQ(test, kref_read(&bo->kref), 1); > + KUNIT_EXPECT_EQ(test, kref_read(&bo->base.refcount), 1); > KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev); > KUNIT_EXPECT_EQ(test, bo->type, bo_type); > KUNIT_EXPECT_EQ(test, bo->page_alignment, PAGE_SIZE); > @@ -176,7 +176,7 @@ static void ttm_bo_init_reserved_mock_man(struct > kunit *test) > dma_resv_unlock(bo->base.resv); > > KUNIT_EXPECT_EQ(test, err, 0); > - KUNIT_EXPECT_EQ(test, kref_read(&bo->kref), 1); > + KUNIT_EXPECT_EQ(test, kref_read(&bo->base.refcount), 1); > KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev); > KUNIT_EXPECT_EQ(test, bo->type, bo_type); > KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size); > diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c > b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c > index b91c13f46225..7bc8eae77b8c 100644 > --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c > +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c > @@ -190,8 +190,6 @@ struct ttm_buffer_object > *ttm_bo_kunit_init(struct kunit *test, > bo->bdev = devs->ttm_dev; > bo->destroy = dummy_ttm_bo_destroy; > > - kref_init(&bo->kref); > - > return bo; > } > EXPORT_SYMBOL_GPL(ttm_bo_kunit_init); > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c > b/drivers/gpu/drm/ttm/ttm_bo.c > index abcf45bc3930..071cbad2fe9e 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -48,14 +48,6 @@ > #include "ttm_module.h" > #include "ttm_bo_internal.h" > > -static void ttm_bo_release(struct kref *kref); > - > -/* TODO: remove! */ > -void ttm_bo_put(struct ttm_buffer_object *bo) > -{ > - kref_put(&bo->kref, ttm_bo_release); > -} > - > static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, > struct ttm_placement > *placement) > { > @@ -252,82 +244,91 @@ static void ttm_bo_delayed_delete(struct > work_struct *work) > ttm_bo_put(bo); > } > > -static void ttm_bo_release(struct kref *kref) > +static void ttm_bo_free(struct drm_gem_object *gobj) > +{ > + struct ttm_buffer_object *bo = container_of(gobj, > typeof(*bo), base); > + > + atomic_dec(&ttm_glob.bo_count); > + bo->destroy(bo); > +} > + > +/* > + * All other callbacks should never ever be called on a deleted TTM > object. > + */ > +static const struct drm_gem_object_funcs ttm_deleted_object_funcs = > { > + .free = ttm_bo_free > +}; > + > +/* Returns true if the BO is about to get deleted */ > +static bool ttm_bo_is_zombie(struct ttm_buffer_object *bo) > +{ > + return bo->base.funcs == &ttm_deleted_object_funcs; > +} > + > +void ttm_bo_fini(struct ttm_buffer_object *bo) > { > - struct ttm_buffer_object *bo = > - container_of(kref, struct ttm_buffer_object, kref); > struct ttm_device *bdev = bo->bdev; > int ret; > > WARN_ON_ONCE(bo->pin_count); > WARN_ON_ONCE(bo->bulk_move); > > - if (!bo->deleted) { > - ret = ttm_bo_individualize_resv(bo); > - if (ret) { > - /* Last resort, if we fail to allocate > memory for the > - * fences block for the BO to become idle > - */ > - dma_resv_wait_timeout(bo->base.resv, > - > DMA_RESV_USAGE_BOOKKEEP, false, > - 30 * HZ); > - } > + ret = ttm_bo_individualize_resv(bo); > + if (ret) { > + /* Last resort, if we fail to allocate memory for > the > + * fences block for the BO to become idle > + */ > + dma_resv_wait_timeout(bo->base.resv, > DMA_RESV_USAGE_BOOKKEEP, > + false, 30 * HZ); > + } > > - if (bo->bdev->funcs->release_notify) > - bo->bdev->funcs->release_notify(bo); > - > - drm_vma_offset_remove(bdev->vma_manager, &bo- > >base.vma_node); > - ttm_mem_io_free(bdev, bo->resource); > - > - if (!dma_resv_test_signaled(&bo->base._resv, > - DMA_RESV_USAGE_BOOKKEEP) > || > - (want_init_on_free() && (bo->ttm != NULL)) || > - bo->type == ttm_bo_type_sg || > - !dma_resv_trylock(bo->base.resv)) { > - /* The BO is not idle, resurrect it for > delayed destroy */ > - ttm_bo_flush_all_fences(bo); > - bo->deleted = true; > - > - spin_lock(&bo->bdev->lru_lock); > - > - /* > - * Make pinned bos immediately available to > - * shrinkers, now that they are queued for > - * destruction. > - * > - * FIXME: QXL is triggering this. Can be > removed when the > - * driver is fixed. > - */ > - if (bo->pin_count) { > - bo->pin_count = 0; > - ttm_resource_move_to_lru_tail(bo- > >resource); > - } > + if (bo->bdev->funcs->release_notify) > + bo->bdev->funcs->release_notify(bo); > + > + drm_vma_offset_remove(bdev->vma_manager, &bo- > >base.vma_node); > + ttm_mem_io_free(bdev, bo->resource); > > - kref_init(&bo->kref); > - spin_unlock(&bo->bdev->lru_lock); > + if (!dma_resv_test_signaled(&bo->base._resv, > DMA_RESV_USAGE_BOOKKEEP) || > + (want_init_on_free() && (bo->ttm != NULL)) || > + bo->type == ttm_bo_type_sg || > + !dma_resv_trylock(bo->base.resv)) { > + /* The BO is not idle, resurrect it for delayed > destroy */ > + ttm_bo_flush_all_fences(bo); > > - INIT_WORK(&bo->delayed_delete, > ttm_bo_delayed_delete); > + spin_lock(&bo->bdev->lru_lock); > > - /* Schedule the worker on the closest NUMA > node. This > - * improves performance since system memory > might be > - * cleared on free and that is best done on > a CPU core > - * close to it. > - */ > - queue_work_node(bdev->pool.nid, bdev->wq, > &bo->delayed_delete); > - return; > + /* > + * Make pinned bos immediately available to > + * shrinkers, now that they are queued for > + * destruction. > + * > + * FIXME: QXL is triggering this. Can be removed > when the > + * driver is fixed. > + */ > + if (bo->pin_count) { > + bo->pin_count = 0; > + ttm_resource_move_to_lru_tail(bo->resource); > } > > + kref_init(&bo->base.refcount); > + bo->base.funcs = &ttm_deleted_object_funcs; > + spin_unlock(&bo->bdev->lru_lock); > + > + INIT_WORK(&bo->delayed_delete, > ttm_bo_delayed_delete); > + > + /* Schedule the worker on the closest NUMA node. > This > + * improves performance since system memory might be > + * cleared on free and that is best done on a CPU > core > + * close to it. > + */ > + queue_work_node(bdev->pool.nid, bdev->wq, &bo- > >delayed_delete); > + } else { > ttm_bo_cleanup_memtype_use(bo); > dma_resv_unlock(bo->base.resv); > - } > > - atomic_dec(&ttm_glob.bo_count); > - bo->destroy(bo); > -} > - > -void ttm_bo_fini(struct ttm_buffer_object *bo) > -{ > - ttm_bo_put(bo); > + atomic_dec(&ttm_glob.bo_count); > + bo->destroy(bo); > + } > } > EXPORT_SYMBOL(ttm_bo_fini); > > @@ -471,7 +472,7 @@ int ttm_bo_evict_first(struct ttm_device *bdev, > struct ttm_resource_manager *man > if (!bo->resource || bo->resource->mem_type != mem_type) > goto out_bo_moved; > > - if (bo->deleted) { > + if (ttm_bo_is_zombie(bo)) { > ret = ttm_bo_wait_ctx(bo, ctx); > if (!ret) > ttm_bo_cleanup_memtype_use(bo); > @@ -525,7 +526,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk > *walk, struct ttm_buffer_object * > if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, > evict_walk->place)) > return 0; > > - if (bo->deleted) { > + if (ttm_bo_is_zombie(bo)) { > lret = ttm_bo_wait_ctx(bo, walk->ctx); > if (!lret) > ttm_bo_cleanup_memtype_use(bo); > @@ -623,7 +624,6 @@ static int ttm_bo_evict_alloc(struct ttm_device > *bdev, > void ttm_bo_pin(struct ttm_buffer_object *bo) > { > dma_resv_assert_held(bo->base.resv); > - WARN_ON_ONCE(!kref_read(&bo->kref)); > spin_lock(&bo->bdev->lru_lock); > if (bo->resource) > ttm_resource_del_bulk_move(bo->resource, bo); > @@ -642,7 +642,6 @@ EXPORT_SYMBOL(ttm_bo_pin); > void ttm_bo_unpin(struct ttm_buffer_object *bo) > { > dma_resv_assert_held(bo->base.resv); > - WARN_ON_ONCE(!kref_read(&bo->kref)); > if (WARN_ON_ONCE(!bo->pin_count)) > return; > > @@ -931,7 +930,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, > struct ttm_buffer_object *bo, > { > int ret; > > - kref_init(&bo->kref); > bo->bdev = bdev; > bo->type = type; > bo->page_alignment = alignment; > @@ -1127,7 +1125,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, > struct ttm_buffer_object *bo) > goto out; > } > > - if (bo->deleted) { > + if (ttm_bo_is_zombie(bo)) { > pgoff_t num_pages = bo->ttm->num_pages; > > ret = ttm_bo_wait_ctx(bo, ctx); > diff --git a/drivers/gpu/drm/ttm/ttm_bo_internal.h > b/drivers/gpu/drm/ttm/ttm_bo_internal.h > index e0d48eac74b0..81327bc73834 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_internal.h > +++ b/drivers/gpu/drm/ttm/ttm_bo_internal.h > @@ -34,7 +34,7 @@ > */ > static inline void ttm_bo_get(struct ttm_buffer_object *bo) > { > - kref_get(&bo->kref); > + drm_gem_object_get(&bo->base); > } > > /** > @@ -50,11 +50,14 @@ static inline void ttm_bo_get(struct > ttm_buffer_object *bo) > static inline __must_check struct ttm_buffer_object * > ttm_bo_get_unless_zero(struct ttm_buffer_object *bo) > { > - if (!kref_get_unless_zero(&bo->kref)) > + if (!kref_get_unless_zero(&bo->base.refcount)) > return NULL; > return bo; > } > > -void ttm_bo_put(struct ttm_buffer_object *bo); > +static inline void ttm_bo_put(struct ttm_buffer_object *bo) > +{ > + drm_gem_object_put(&bo->base); > +} > > #endif > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c > b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 56f3152f34f5..56645039757e 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -245,7 +245,7 @@ static int ttm_buffer_object_transfer(struct > ttm_buffer_object *bo, > atomic_inc(&ttm_glob.bo_count); > drm_vma_node_reset(&fbo->base.base.vma_node); > > - kref_init(&fbo->base.kref); > + kref_init(&fbo->base.base.refcount); > fbo->base.destroy = &ttm_transfered_destroy; > fbo->base.pin_count = 0; > if (bo->type != ttm_bo_type_sg) > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h > index 4b0552d1bc55..4fe4031f0165 100644 > --- a/include/drm/ttm/ttm_bo.h > +++ b/include/drm/ttm/ttm_bo.h > @@ -78,11 +78,8 @@ enum ttm_bo_type { > * @type: The bo type. > * @page_alignment: Page alignment. > * @destroy: Destruction function. If NULL, kfree is used. > - * @kref: Reference count of this buffer object. When this refcount > reaches > - * zero, the object is destroyed or put on the delayed delete list. > * @resource: structure describing current placement. > * @ttm: TTM structure holding system pages. > - * @deleted: True if the object is only a zombie and already > deleted. > * @bulk_move: The bulk move object. > * @priority: Priority for LRU, BOs with lower priority are evicted > first. > * @pin_count: Pin count. > @@ -109,17 +106,11 @@ struct ttm_buffer_object { > uint32_t page_alignment; > void (*destroy) (struct ttm_buffer_object *); > > - /* > - * Members not needing protection. > - */ > - struct kref kref; > - > /* > * Members protected by the bo::resv::reserved lock. > */ > struct ttm_resource *resource; > struct ttm_tt *ttm; > - bool deleted; > struct ttm_lru_bulk_move *bulk_move; > unsigned priority; > unsigned pin_count;
