The TTM LRU-walk cherry-pick series

2024-07-07 Thread Thomas Hellström
Christian, Matthew,

I think I addressed all review comments and a couple of anticipated
ones (s/long/s64/) in the swapout- and eviction patches.

I'm heading off on vacation today, (4 weeks) so if something becomes
urgent in-between feel free to pick up, modify and merge. 

Regarding the drm_exec trylock functionality I'm for as much as
possible that it should look like any other locking primitive trylock.
i.e. no additional tricks needed.

Thanks,
Thomas




[PATCH v7 8/8] drm/ttm: Balance ttm_resource_cursor_init() and ttm_resource_cursor_fini()

2024-07-05 Thread Thomas Hellström
Make the interface more symmetric by providing and using a
ttm_resource_cursor_init().

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  3 ++-
 drivers/gpu/drm/ttm/ttm_bo_util.c  |  4 +++-
 drivers/gpu/drm/ttm/ttm_resource.c | 34 +-
 include/drm/ttm/ttm_resource.h | 12 ++-
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0131ec802066..7fcd9cb0478e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -449,7 +449,8 @@ int ttm_bo_evict_first(struct ttm_device *bdev, struct 
ttm_resource_manager *man
int ret = 0;
 
spin_lock(>lru_lock);
-   res = ttm_resource_manager_first(man, );
+   ttm_resource_cursor_init(, man);
+   res = ttm_resource_manager_first();
ttm_resource_cursor_fini();
if (!res) {
ret = -ENOENT;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 3c07f4712d5c..ec6a0482cd94 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -865,7 +865,8 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, 
struct ttm_device *bdev,
s64 lret;
 
spin_lock(>lru_lock);
-   ttm_resource_manager_for_each_res(man, , res) {
+   ttm_resource_cursor_init(, man);
+   ttm_resource_manager_for_each_res(, res) {
struct ttm_buffer_object *bo = res->bo;
bool bo_needs_unlock = false;
bool bo_locked = false;
@@ -906,6 +907,7 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, 
struct ttm_device *bdev,
 
ttm_lru_walk_unlock(bo, bo_needs_unlock);
ttm_bo_put(bo);
+
if (lret == -EBUSY || lret == -EALREADY)
lret = 0;
progress = (lret < 0) ? lret : progress + lret;
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 6d764ba88aab..b300d615e196 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -80,6 +80,23 @@ static void ttm_bulk_move_drop_cursors(struct 
ttm_lru_bulk_move *bulk)
ttm_resource_cursor_clear_bulk(cursor);
 }
 
+/**
+ * ttm_resource_cursor_init() - Initialize a struct ttm_resource_cursor
+ * @cursor: The cursor to initialize.
+ * @man: The resource manager.
+ *
+ * Initialize the cursor before using it for iteration.
+ */
+void ttm_resource_cursor_init(struct ttm_resource_cursor *cursor,
+ struct ttm_resource_manager *man)
+{
+   cursor->priority = 0;
+   cursor->man = man;
+   ttm_lru_item_init(>hitch, TTM_LRU_HITCH);
+   INIT_LIST_HEAD(>bulk_link);
+   INIT_LIST_HEAD(>hitch.link);
+}
+
 /**
  * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage
  * @cursor: The struct ttm_resource_cursor to finalize.
@@ -586,17 +603,16 @@ ttm_resource_cursor_check_bulk(struct ttm_resource_cursor 
*cursor,
  * Return: The first resource from the resource manager.
  */
 struct ttm_resource *
-ttm_resource_manager_first(struct ttm_resource_manager *man,
-  struct ttm_resource_cursor *cursor)
+ttm_resource_manager_first(struct ttm_resource_cursor *cursor)
 {
-   lockdep_assert_held(>bdev->lru_lock);
+   struct ttm_resource_manager *man = cursor->man;
 
-   cursor->priority = 0;
-   cursor->man = man;
-   ttm_lru_item_init(>hitch, TTM_LRU_HITCH);
-   INIT_LIST_HEAD(>bulk_link);
-   list_add(>hitch.link, >lru[cursor->priority]);
+   if (WARN_ON_ONCE(!man))
+   return NULL;
+
+   lockdep_assert_held(>bdev->lru_lock);
 
+   list_move(>hitch.link, >lru[cursor->priority]);
return ttm_resource_manager_next(cursor);
 }
 
@@ -632,8 +648,6 @@ ttm_resource_manager_next(struct ttm_resource_cursor 
*cursor)
ttm_resource_cursor_clear_bulk(cursor);
}
 
-   ttm_resource_cursor_fini(cursor);
-
return NULL;
 }
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index be034be56ba1..ee0e652328b3 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -325,6 +326,9 @@ struct ttm_resource_cursor {
unsigned int priority;
 };
 
+void ttm_resource_cursor_init(struct ttm_resource_cursor *cursor,
+ struct ttm_resource_manager *man);
+
 void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor);
 
 /**
@@ -456,8 +460,7 @@ void ttm_resource_manager_debug(struct ttm_resource_manager 
*man,
struct drm_printer *p);
 
 struct ttm_resource *
-ttm_resource_manager_first(struct ttm_resource_manager *man,
- 

[PATCH v7 7/8] drm/ttm: Use the LRU walker for eviction

2024-07-05 Thread Thomas Hellström
Use the LRU walker for eviction. This helps
removing a lot of code with weird locking
semantics.

The functionality is slightly changed so that
when trylocked buffer objects are exhausted, we
continue to interleave walks with ticket-locks while
there is still progress made. The list walks are
not restarted in-between evictions.

Also provide a separate ttm_bo_evict_first()
function for its single user. The context of that
user allows sleeping dma_resv locks.

v6:
- Various cleanups suggested by Matthew Brost.
- Fix error return code of ttm_bo_evict_first(). (Matthew Brost)
- Fix an error check that was inverted. (Matthew Brost)
v7:
- Use s64 rather than long (Christian König)
- Early ttm_resource_cursor_fini() in ttm_bo_evict_first().
- Simplify check for bo_moved in ttm_bo_evict_first().
  (Christian König)
- Don't evict pinned bos.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost  #v6
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 345 -
 drivers/gpu/drm/ttm/ttm_resource.c |  21 +-
 include/drm/ttm/ttm_bo.h   |   8 +-
 3 files changed, 143 insertions(+), 231 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f4b2b2bea6cb..0131ec802066 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct 
ttm_buffer_object *bo)
dma_resv_iter_end();
 }
 
-/**
- * ttm_bo_cleanup_refs
- * If bo idle, remove from lru lists, and unref.
- * If not idle, block if possible.
- *
- * Must be called with lru_lock and reservation held, this function
- * will drop the lru lock and optionally the reservation lock before returning.
- *
- * @bo:The buffer object to clean-up
- * @interruptible: Any sleeps should occur interruptibly.
- * @no_wait_gpu:   Never wait for gpu. Return -EBUSY instead.
- * @unlock_resv:   Unlock the reservation lock as well.
- */
-
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-  bool interruptible, bool no_wait_gpu,
-  bool unlock_resv)
-{
-   struct dma_resv *resv = >base._resv;
-   int ret;
-
-   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
-   ret = 0;
-   else
-   ret = -EBUSY;
-
-   if (ret && !no_wait_gpu) {
-   long lret;
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-
-   lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
-interruptible,
-30 * HZ);
-
-   if (lret < 0)
-   return lret;
-   else if (lret == 0)
-   return -EBUSY;
-
-   spin_lock(>bdev->lru_lock);
-   if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
-   /*
-* We raced, and lost, someone else holds the 
reservation now,
-* and is probably busy in ttm_bo_cleanup_memtype_use.
-*
-* Even if it's not the case, because we finished 
waiting any
-* delayed destruction would succeed, so just return 
success
-* here.
-*/
-   spin_unlock(>bdev->lru_lock);
-   return 0;
-   }
-   ret = 0;
-   }
-
-   if (ret) {
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-   return ret;
-   }
-
-   spin_unlock(>bdev->lru_lock);
-   ttm_bo_cleanup_memtype_use(bo);
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-
-   return 0;
-}
-
 /*
  * Block for the dma_resv object to become idle, lock the buffer and clean up
  * the resource and tt object.
@@ -505,151 +431,152 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object 
*bo,
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
-/*
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
+/**
+ * ttm_bo_evict_first() - Evict the first bo on the manager's LRU list.
+ * @bdev: The ttm device.
+ * @man: The manager whose bo to evict.
+ * @ctx: The TTM operation ctx governing the eviction.
  *
- * b. Otherwise, trylock it.
+ * Return: 0 if su

[PATCH v7 6/8] drm/ttm: Use the LRU walker helper for swapping

2024-07-05 Thread Thomas Hellström
Rework the TTM swapping to use the LRU walker helper.
This helps fixing up the ttm_bo_swapout() interface
to be consistent about not requiring any locking.

For now mimic the current behaviour of using trylock
only. We could be using ticket-locks here but defer
that until it's deemed necessary. The TTM swapout
functionality is a bit weird anyway since it
alternates between memory types without exhausting
TTM_PL_SYSTEM first.

Intentionally keep pages as the unit of progress since
changing that to bytes is an unrelated change that can
be done later.

v6:
- Improve on error code translation in the swapout callback
  (Matthew Brost).
v7:
- Use s64 rather than long.
- Remove ttm_resource_cursor_fini() since it's no longer used.
- Rename ttm_resource_cursor_fini_locked() to
  ttm_resource_cursor_fini().
- Don't swap out pinned bos.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost  #v6
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 116 +++--
 drivers/gpu/drm/ttm/ttm_bo_util.c  |   2 +-
 drivers/gpu/drm/ttm/ttm_device.c   |  30 ++--
 drivers/gpu/drm/ttm/ttm_resource.c |  23 +-
 include/drm/ttm/ttm_bo.h   |   5 +-
 include/drm/ttm/ttm_resource.h |   2 -
 6 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 43eda720657f..f4b2b2bea6cb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -621,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (locked)
dma_resv_unlock(res->bo->base.resv);
}
-   ttm_resource_cursor_fini_locked();
+   ttm_resource_cursor_fini();
 
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
@@ -1118,12 +1118,24 @@ int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, 
struct ttm_operation_ctx *ctx)
 }
 EXPORT_SYMBOL(ttm_bo_wait_ctx);
 
-int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-  gfp_t gfp_flags)
+/**
+ * struct ttm_bo_swapout_walk - Parameters for the swapout walk
+ */
+struct ttm_bo_swapout_walk {
+   /** @walk: The walk base parameters. */
+   struct ttm_lru_walk walk;
+   /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */
+   gfp_t gfp_flags;
+};
+
+static s64
+ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 {
-   struct ttm_place place;
-   bool locked;
-   long ret;
+   struct ttm_place place = {.mem_type = bo->resource->mem_type};
+   struct ttm_bo_swapout_walk *swapout_walk =
+   container_of(walk, typeof(*swapout_walk), walk);
+   struct ttm_operation_ctx *ctx = walk->ctx;
+   s64 ret;
 
/*
 * While the bo may already reside in SYSTEM placement, set
@@ -1131,28 +1143,29 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * The driver may use the fact that we're moving from SYSTEM
 * as an indication that we're about to swap out.
 */
-   memset(, 0, sizeof(place));
-   place.mem_type = bo->resource->mem_type;
-   if (!ttm_bo_evict_swapout_allowable(bo, ctx, , , NULL))
-   return -EBUSY;
+   if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, )) {
+   ret = -EBUSY;
+   goto out;
+   }
 
if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL ||
-   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED ||
-   !ttm_bo_get_unless_zero(bo)) {
-   if (locked)
-   dma_resv_unlock(bo->base.resv);
-   return -EBUSY;
+   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) {
+   ret = -EBUSY;
+   goto out;
}
 
if (bo->deleted) {
-   ret = ttm_bo_cleanup_refs(bo, false, false, locked);
-   ttm_bo_put(bo);
-   return ret == -EBUSY ? -ENOSPC : ret;
-   }
+   pgoff_t num_pages = bo->ttm->num_pages;
 
-   /* TODO: Cleanup the locking */
-   spin_unlock(>bdev->lru_lock);
+   ret = ttm_bo_wait_ctx(bo, ctx);
+   if (ret)
+   goto out;
+
+   ttm_bo_cleanup_memtype_use(bo);
+   ret = num_pages;
+   goto out;
+   }
 
/*
 * Move to system cached
@@ -1164,12 +1177,13 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
memset(, 0, sizeof(hop));
place.mem_type = TTM_PL_SYSTEM;
ret = ttm_resource_alloc(bo, , _mem);
-   if (unlikely(ret))
+   if (ret)
goto out;
 

[PATCH v7 2/8] drm/ttm: Slightly clean up LRU list iteration

2024-07-05 Thread Thomas Hellström
To make the transition to using lru hitches easier,
simplify the ttm_resource_manager_next() interface to only take
the cursor and reuse ttm_resource_manager_next() functionality
from ttm_resource_manager_first().

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_resource.c | 48 +-
 include/drm/ttm/ttm_resource.h | 10 ---
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index db9a7a3717c4..8bfbc0e8 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -496,50 +496,44 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru;
-
lockdep_assert_held(>bdev->lru_lock);
 
-   for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   return NULL;
+   cursor->priority = 0;
+   cursor->man = man;
+   cursor->cur = >lru[cursor->priority];
+   return ttm_resource_manager_next(cursor);
 }
 
 /**
  * ttm_resource_manager_next
  *
- * @man: resource manager to iterate over
  * @cursor: cursor to record the position
- * @res: the current resource pointer
  *
- * Returns the next resource from the resource manager.
+ * Return: the next resource from the resource manager.
  */
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res)
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru = >lru;
+   struct ttm_resource_manager *man = cursor->man;
+   struct ttm_lru_item *lru;
 
lockdep_assert_held(>bdev->lru_lock);
 
-   list_for_each_entry_continue(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   ttm_lru_item_to_res(lru);
+   for (;;) {
+   lru = list_entry(cursor->cur, typeof(*lru), link);
+   list_for_each_entry_continue(lru, >lru[cursor->priority], 
link) {
+   if (ttm_lru_item_is_res(lru)) {
+   cursor->cur = >link;
+   return ttm_lru_item_to_res(lru);
+   }
}
 
+   if (++cursor->priority >= TTM_MAX_BO_PRIORITY)
+   break;
+
+   cursor->cur = >lru[cursor->priority];
+   }
+
return NULL;
 }
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 1511d91e290d..7d81fd5b5b83 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
 /**
  * struct ttm_resource_cursor
  *
+ * @man: The resource manager currently being iterated over.
+ * @cur: The list head the cursor currently points to.
  * @priority: the current priority
  *
  * Cursor to iterate over the resources in a manager.
  */
 struct ttm_resource_cursor {
+   struct ttm_resource_manager *man;
+   struct list_head *cur;
unsigned int priority;
 };
 
@@ -438,9 +442,7 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor);
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res);
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor);
 
 struct ttm_resource *
 ttm_lru_first_res_or_null(struct list_head *head);
@@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head);
  */
 #define ttm_resource_manager_for_each_res(man, cursor, res)\
for (res = ttm_resource_manager_first(man, cursor); res;\
-res = ttm_resource_manager_next(man, cursor, res))
+res = ttm_resource_manager_next(cursor))
 
 struct ttm_kmap_iter *
 ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
-- 
2.44.0



[PATCH v7 5/8] drm/ttm: Provide a generic LRU walker helper

2024-07-05 Thread Thomas Hellström
Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOSPC and then to -ENOMEM before reaching
the driver, so that the driver will need to backoff and possibly retry
without being able to keep the ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.
v6:
- Some cleanup and added code comments (Matthew Brost)
- Clarified the ticketlock in the commit message (Matthew Brost)
v7:
- Use s64 rather than long for the target and progress
  (Christian König)
- Update documentation to not encourage using pages as a
  progress measure. (Christian König)
- Remove cond_resched(). (Christian König)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost  #v6
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 151 ++
 include/drm/ttm/ttm_bo.h  |  35 +++
 2 files changed, 186 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0b3f4267130c..c2759add58f5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -768,3 +768,154 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
ttm_tt_destroy(bo->bdev, ttm);
return ret;
 }
+
+static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
+struct ttm_buffer_object *bo,
+bool *needs_unlock)
+{
+   struct ttm_operation_ctx *ctx = walk->ctx;
+
+   *needs_unlock = false;
+
+   if (dma_resv_trylock(bo->base.resv)) {
+   *needs_unlock = true;
+   return true;
+   }
+
+   if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
+   dma_resv_assert_held(bo->base.resv);
+   return true;
+   }
+
+   return false;
+}
+
+static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
+  struct ttm_buffer_object *bo,
+  bool *needs_unlock)
+{
+   struct dma_resv *resv = bo->base.resv;
+   int ret;
+
+   if (walk->ctx->interruptible)
+   ret = dma_resv_lock_interruptible(resv, walk->ticket);
+   else
+   ret = dma_resv_lock(resv, walk->ticket);
+
+   if (!ret) {
+   *needs_unlock = true;
+   /*
+* Only a single ticketlock per loop. Ticketlocks are prone
+* to return -EDEADLK causing the eviction to fail, so
+* after waiting for the ticketlock, revert back to
+* trylocking for this walk.
+*/
+   walk->ticket = NULL;
+   } else if (ret == -EDEADLK) {
+   /* Caller needs to exit the ww transaction. */
+   ret = -ENOSPC;
+   }
+
+   return ret;
+}
+
+static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
+{
+   if (locked)
+   dma_resv_unlock(bo->base.resv);
+}
+
+/**
+ * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
+ * valid items.
+ * @walk: describe the walks and actions taken
+ * @bdev: The TTM device.
+ * @man: The struct ttm_resource manager whose LRU lists we're walking.
+ * @target: The end condition for the walk.
+ *
+ * The LRU lists of @man are walk, and for each struct ttm_resource 
encountered,
+ * the corresponding ttm_buffer_object is locked and taken a reference on, and
+ * the LRU lock is dropped. the LRU lock may be dropped before locking and, in
+ * that case, it's verified that the item actually remains on the LRU list 
after
+ * the lock, and that the buffer object didn't switch resource in between.
+ *
+ * With a locked object, the actions indicated by @walk->process_bo are
+ * performed, and after that, the bo is unlocked, the refcount dropped and the
+ * next struct ttm_resource is processed. Here, the walker relies on
+ * TTM's restartable LRU list implementation.
+ *
+ * Typically @walk->process_bo() would return the number of pages evicted,
+ * swapped or shrunken, so that when the total exceeds @target, or when the
+ * LRU list has been walked in full, iteration is terminated. It's also 
terminated
+ * on error. Note that the definition of @target is done by the caller, it
+ * could have a different meaning than the number of pages.
+ *
+ * Note that the way dma_resv individualization is done, locking needs to be 
done
+ * either with the LRU lock held (trylocking only) or with a reference on the
+ * object.
+ *
+ * Return: The progress made t

[PATCH v7 3/8] drm/ttm: Use LRU hitches

2024-07-05 Thread Thomas Hellström
Have iterators insert themselves into the list they are iterating
over using hitch list nodes. Since only the iterator owner
can remove these list nodes from the list, it's safe to unlock
the list and when continuing, use them as a starting point. Due to
the way LRU bumping works in TTM, newly added items will not be
missed, and bumped items will be iterated over a second time before
reaching the end of the list.

The exception is list with bulk move sublists. When bumping a
sublist, a hitch that is part of that sublist will also be moved
and we might miss items if restarting from it. This will be
addressed in a later patch.

Changes in previous series:
- Updated ttm_resource_cursor_fini() documentation.
v2:
- Don't reorder ttm_resource_manager_first() and _next().
  (Christian König).
- Use list_add instead of list_move
  (Christian König)
v3:
- Split into two patches, one cleanup, one new functionality
  (Christian König)
- use ttm_resource_cursor_fini_locked() instead of open-coding
  (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  1 +
 drivers/gpu/drm/ttm/ttm_device.c   |  9 +++--
 drivers/gpu/drm/ttm/ttm_resource.c | 56 +-
 include/drm/ttm/ttm_resource.h |  9 +++--
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..43eda720657f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -621,6 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (locked)
dma_resv_unlock(res->bo->base.resv);
}
+   ttm_resource_cursor_fini_locked();
 
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 09411978a13a..f9e9b1ec8c8a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -170,12 +170,17 @@ int ttm_device_swapout(struct ttm_device *bdev, struct 
ttm_operation_ctx *ctx,
num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
-   if (!ret)
+   if (!ret) {
+   ttm_resource_cursor_fini();
return num_pages;
-   if (ret != -EBUSY)
+   }
+   if (ret != -EBUSY) {
+   ttm_resource_cursor_fini();
return ret;
+   }
}
}
+   ttm_resource_cursor_fini_locked();
spin_unlock(>lru_lock);
return 0;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 8bfbc0e8..9c8b6499edfb 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,37 @@
 
 #include 
 
+/**
+ * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called with the LRU lock held. The function
+ * can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor)
+{
+   lockdep_assert_held(>man->bdev->lru_lock);
+   list_del_init(>hitch.link);
+}
+
+/**
+ * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called without the LRU list lock held. The
+ * function can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor)
+{
+   spinlock_t *lru_lock = >man->bdev->lru_lock;
+
+   spin_lock(lru_lock);
+   ttm_resource_cursor_fini_locked(cursor);
+   spin_unlock(lru_lock);
+}
+
 /**
  * ttm_lru_bulk_move_init - initialize a bulk move structure
  * @bulk: the structure to init
@@ -485,12 +516,15 @@ void ttm_resource_manager_debug(struct 
ttm_resource_manager *man,
 EXPORT_SYMBOL(ttm_resource_manager_debug);
 
 /**
- * ttm_resource_manager_first
- *
+ * ttm_resource_manager_first() - Start iterating over the resources
+ * of a resource manager
  * @man: resource manager to iterate over
  * @cursor: cursor to record the position
  *
- * Returns the first resource from the resource manager.
+ * Initializes the cursor and starts iterating. When done iterating,
+ * the caller must explicitly call ttm_resource_cursor_f

[PATCH v7 4/8] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-07-05 Thread Thomas Hellström
To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.

Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.

Changes in previous series:
- Completely rework the functionality
- Avoid a NULL pointer dereference assigning manager->mem_type
- Remove some leftover code causing build problems
v2:
- For hitch bulk tail moves, store the mem_type in the cursor
  instead of with the manager.
v3:
- Remove leftover mem_type member from change in v2.
v6:
- Add some lockdep asserts (Matthew Brost)
- Avoid NULL pointer dereference (Matthew Brost)
- No need to check bo->resource before dereferencing
  bo->bulk_move (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
 drivers/gpu/drm/ttm/ttm_resource.c | 92 ++
 drivers/gpu/drm/xe/xe_vm.c |  4 ++
 include/drm/ttm/ttm_resource.h | 56 ++--
 4 files changed, 135 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3abfa66d72a2..97743993d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2420,6 +2420,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
return r;
 
+   ttm_lru_bulk_move_init(>lru_bulk_move);
+
vm->is_compute_context = false;
 
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2484,6 +2486,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
amdgpu_vm_fini_entities(vm);
 
return r;
@@ -2640,6 +2643,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
}
 
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
 }
 
 /**
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 9c8b6499edfb..b6a2daac5518 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,53 @@
 
 #include 
 
+/* Detach the cursor from the bulk move list*/
+static void
+ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
+{
+   lockdep_assert_held(>man->bdev->lru_lock);
+
+   cursor->bulk = NULL;
+   list_del_init(>bulk_link);
+}
+
+/* Move the cursor to the end of the bulk move list it's in */
+static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk,
+  struct ttm_resource_cursor 
*cursor)
+{
+   struct ttm_lru_bulk_move_pos *pos;
+
+   lockdep_assert_held(>man->bdev->lru_lock);
+
+   if (WARN_ON_ONCE(bulk != cursor->bulk)) {
+   list_del_init(>bulk_link);
+   return;
+   }
+
+   pos = >pos[cursor->mem_type][cursor->priority];
+   if (pos->last)
+   list_move(>hitch.link, >last->lru.link);
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
+/* Move all cursors attached to a bulk move to its end */
+static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_move_bulk_tail(bulk, cursor);
+}
+
+/* Remove a cursor from an empty bulk move list */
+static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
 /**
  * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
  * @cursor: The struct ttm_resource_cursor to finalize.
@@ -45,6 +92,7 @@ void ttm_resource_cursor_fini_locked(struct 
ttm_resource_cursor *cursor)
 {
lockdep_assert_held(>man->bdev->lru_lock);
list_del_init(>hitch.link);
+   ttm_resource_cursor_clear_bulk(cursor);
 }
 
 /**
@@ -73,9 +121,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor 
*cursor)
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
 {
  

[PATCH v7 1/8] drm/ttm: Allow TTM LRU list nodes of different types

2024-07-05 Thread Thomas Hellström
To be able to handle list unlocking while traversing the LRU
list, we want the iterators not only to point to the next
position of the list traversal, but to insert themselves as
list nodes at that point to work around the fact that the
next node might otherwise disappear from the list while
the iterator is pointing to it.

These list nodes need to be easily distinguishable from other
list nodes so that others traversing the list can skip
over them.

So declare a struct ttm_lru_item, with a struct list_head member
and a type enum. This will slightly increase the size of a
struct ttm_resource.

Changes in previous series:
- Update enum ttm_lru_item_type documentation.
v3:
- Introduce ttm_lru_first_res_or_null()
  (Christian König, Thomas Hellström)
v5:
- Update also the TTM test code (Xe CI).

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c   |  6 +-
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |  2 +-
 drivers/gpu/drm/ttm/ttm_device.c  |  4 +-
 drivers/gpu/drm/ttm/ttm_resource.c| 89 +++
 include/drm/ttm/ttm_resource.h| 54 ++-
 5 files changed, 129 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
index d1b32303d051..f0a7eb62116c 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
@@ -271,7 +271,7 @@ static void ttm_bo_unreserve_basic(struct kunit *test)
 
man = ttm_manager_type(priv->ttm_dev, mem_type);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >lru[bo->priority]), 1);
+   list_is_last(>lru.link, >lru[bo->priority]), 
1);
 
ttm_resource_free(bo, );
ttm_resource_free(bo, );
@@ -308,11 +308,11 @@ static void ttm_bo_unreserve_pinned(struct kunit *test)
err = ttm_resource_alloc(bo, place, );
KUNIT_ASSERT_EQ(test, err, 0);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >ttm_dev->pinned), 1);
+   list_is_last(>lru.link, >ttm_dev->pinned), 
1);
 
ttm_bo_unreserve(bo);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >ttm_dev->pinned), 1);
+   list_is_last(>lru.link, >ttm_dev->pinned), 
1);
 
ttm_resource_free(bo, );
ttm_resource_free(bo, );
diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
index 9c2f13e53162..22260e7aea58 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
@@ -198,7 +198,7 @@ static void ttm_resource_fini_basic(struct kunit *test)
ttm_resource_init(bo, place, res);
ttm_resource_fini(man, res);
 
-   KUNIT_ASSERT_TRUE(test, list_empty(>lru));
+   KUNIT_ASSERT_TRUE(test, list_empty(>lru.link));
KUNIT_ASSERT_EQ(test, man->usage, 0);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 434cf0258000..09411978a13a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -274,14 +274,14 @@ static void ttm_device_clear_lru_dma_mappings(struct 
ttm_device *bdev,
struct ttm_resource *res;
 
spin_lock(>lru_lock);
-   while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
+   while ((res = ttm_lru_first_res_or_null(list))) {
struct ttm_buffer_object *bo = res->bo;
 
/* Take ref against racing releases once lru_lock is unlocked */
if (!ttm_bo_get_unless_zero(bo))
continue;
 
-   list_del_init(>lru);
+   list_del_init(>resource->lru.link);
spin_unlock(>lru_lock);
 
if (bo->ttm)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 4a66b851b67d..db9a7a3717c4 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -70,8 +70,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
dma_resv_assert_held(pos->last->bo->base.resv);
 
man = ttm_manager_type(pos->first->bo->bdev, i);
-   list_bulk_move_tail(>lru[j], >first->lru,
-   >last->lru);
+   list_bulk_move_tail(>lru[j], >first->lru.link,
+   >last->lru.link);
}
}
 }
@@ -84,14 +84,38 @@ ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, 
struct ttm_resource *res)
return >pos[res->mem_type][res->bo->priori

[PATCH v7 0/8] TTM LRU-walk cherry-picks

2024-07-05 Thread Thomas Hellström
These are cherry-picks from the xe shrinker series here:

https://patchwork.freedesktop.org/series/131815/

extracted to speed up review progress and inclusion.

The series provides a restartable LRU walk and makes it possible
resume the walk after dropping the lock to evict or swap out.

Patch 1-4 implements restartable LRU list iteration.

Patch 5 implements a LRU walker + resv locking helper

Patch 6 moves TTM swapping over to the walker.

Patch 7 moves TTM eviction over to the walker.

Patch 8 Balances the struct ttm_resource_cursor interface

v2:
- Squash obsolete revision history in the patch commit messages.
- Fix a couple of review comments by Christian
- Don't store the mem_type in the TTM managers but in the
  resource cursor.
- Rename introduced TTM *back_up* function names to *backup*
- Add ttm pool recovery fault injection.
- Shrinker xe kunit test
- Various bugfixes

v3:
- Address some review comments from Matthew Brost and Christian König.
- Use the restartable LRU walk for TTM swapping and eviction.
- Provide a POC drm_exec locking implementation for exhaustive
  eviction. (Christian König).

v4:
- Remove the RFC exhaustive eviction part. While the path to exhaustive
  eviction is pretty clear and demonstrated in v3, there is still some
  drm_exec work that needs to be agreed and implemented.
- Add shrinker power management. On some hw we need to wake when shrinking.
- Fix the lru walker helper for -EALREADY errors.
- Add drm/xe: Increase the XE_PL_TT watermark.

v5:
- Update also TTM kunit tests
- Handle ghost- and zombie objects in the shrinker.
- A couple of compile- and UAF fixes reported by Kernel Build Robot and
  Dan Carpenter.

v6:
- Address review comments from Matthew Brost as detailed in patches
  4/12, 5/12, 6/12, 7/12, 8/12.

v7:
- Drop previous patches 8-12 for now and concentrate on 1-7
- Add a new patch 8 to balance the ttm_resource_cursor interface
  (Christian König)
- Fix various style comments from Christian König in patch 5-7.
- Update Reviewed-by: and Acked tags.

Cc: Somalapuram Amaranath 
Cc: Christian König 
Cc: Matthew Brost 
Cc: 

Thomas Hellström (8):
  drm/ttm: Allow TTM LRU list nodes of different types
  drm/ttm: Slightly clean up LRU list iteration
  drm/ttm: Use LRU hitches
  drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist
moves
  drm/ttm: Provide a generic LRU walker helper
  drm/ttm: Use the LRU walker helper for swapping
  drm/ttm: Use the LRU walker for eviction
  drm/ttm: Balance ttm_resource_cursor_init() and
ttm_resource_cursor_fini()

 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   4 +
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c   |   6 +-
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c  | 461 --
 drivers/gpu/drm/ttm/ttm_bo_util.c | 153 ++
 drivers/gpu/drm/ttm/ttm_device.c  |  29 +-
 drivers/gpu/drm/ttm/ttm_resource.c| 269 +++---
 drivers/gpu/drm/xe/xe_vm.c|   4 +
 include/drm/ttm/ttm_bo.h  |  48 +-
 include/drm/ttm/ttm_resource.h| 109 -
 10 files changed, 732 insertions(+), 353 deletions(-)

-- 
2.44.0



Re: [PATCH 2/5] drm/exec: don't immediately add the prelocked obj

2024-07-05 Thread Thomas Hellström
On Fri, 2024-07-05 at 14:41 +0200, Christian König wrote:
> Am 03.07.24 um 17:51 schrieb Thomas Hellström:
> > On Wed, 2024-07-03 at 15:25 +0200, Christian König wrote:
> > > Some contended objects might never be locked again in the case of
> > > eviction
> > > handling for example.
> > > 
> > > Make sure that those doesn't show up in the list of locked
> > > objects
> > > until
> > > they are explicitely mentioned.
> > Could you be a bit more specific in the commit message about in
> > what
> > situations that is bad?
> 
> The prelocked object is not necessarily expected to be in the list of
> locked objects.
> 
> I ran into issues because amdgpu tried to validate all locked objects
> and so tried to also validate the prelocked one (which was only
> locked 
> for eviction).
> 
> That obviously didn't made much sense.

Indeed. Could you add a similar description to the commit message?

/Thomas



> 
> Regards,
> Christian.
> 
> > 
> > /Thomas
> > 
> > 
> > 
> > > Signed-off-by: Christian König 
> > > ---
> > >   drivers/gpu/drm/drm_exec.c | 18 +-
> > >   1 file changed, 9 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/drm_exec.c
> > > b/drivers/gpu/drm/drm_exec.c
> > > index 2da094bdf8a4..220df336fbd9 100644
> > > --- a/drivers/gpu/drm/drm_exec.c
> > > +++ b/drivers/gpu/drm/drm_exec.c
> > > @@ -61,8 +61,11 @@ static void drm_exec_unlock_all(struct
> > > drm_exec
> > > *exec)
> > >   drm_gem_object_put(obj);
> > >   }
> > >   
> > > - drm_gem_object_put(exec->prelocked);
> > > - exec->prelocked = NULL;
> > > + if (exec->prelocked) {
> > > + dma_resv_unlock(exec->prelocked->resv);
> > > + drm_gem_object_put(exec->prelocked);
> > > + exec->prelocked = NULL;
> > > + }
> > >   }
> > >   
> > >   /**
> > > @@ -179,16 +182,9 @@ static int drm_exec_lock_contended(struct
> > > drm_exec *exec)
> > >   dma_resv_lock_slow(obj->resv, >ticket);
> > >   }
> > >   
> > > - ret = drm_exec_obj_locked(exec, obj);
> > > - if (unlikely(ret))
> > > - goto error_unlock;
> > > -
> > >   exec->prelocked = obj;
> > >   return 0;
> > >   
> > > -error_unlock:
> > > - dma_resv_unlock(obj->resv);
> > > -
> > >   error_dropref:
> > >   drm_gem_object_put(obj);
> > >   return ret;
> > > @@ -214,6 +210,10 @@ int drm_exec_lock_obj(struct drm_exec *exec,
> > > struct drm_gem_object *obj)
> > >   return ret;
> > >   
> > >   if (exec->prelocked == obj) {
> > > + ret = drm_exec_obj_locked(exec, obj);
> > > + if (unlikely(ret))
> > > + return ret;
> > > +
> > >   drm_gem_object_put(exec->prelocked);
> > >   exec->prelocked = NULL;
> > >   return 0;
> 



[PATCH v3] drm/xe: Use write-back caching mode for system memory on DGFX

2024-07-05 Thread Thomas Hellström
The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.

However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.

Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.

So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.

Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.

v2:
- Update RB- and Ack tags.
- Rephrase wording in xe_drm.h (Matt Roper)
v3:
- Really rephrase wording.

Signed-off-by: Thomas Hellström 
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra 
Cc: Matthew Auld 
Cc: dri-devel@lists.freedesktop.org
Cc: Joonas Lahtinen 
Cc: Effie Yu 
Cc: Matthew Brost 
Cc: Maarten Lankhorst 
Cc: Jose Souza 
Cc: Michal Mrozek 
Cc:  # v6.8+
Acked-by: Matthew Auld 
Acked-by: José Roberto de Souza 
Reviewed-by: Rodrigo Vivi 
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Acked-by: Michal Mrozek 
Acked-by: Effie Yu  #On chat
---
 drivers/gpu/drm/xe/xe_bo.c   | 47 +++-
 drivers/gpu/drm/xe/xe_bo_types.h |  3 +-
 include/uapi/drm/xe_drm.h|  8 +-
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..31192d983d9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct 
ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
-   enum ttm_caching caching;
+   enum ttm_caching caching = ttm_cached;
int err;
 
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct 
ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
   PAGE_SIZE);
 
-   switch (bo->cpu_caching) {
-   case DRM_XE_GEM_CPU_CACHING_WC:
-   caching = ttm_write_combined;
-   break;
-   default:
-   caching = ttm_cached;
-   break;
-   }
-
-   WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
-* Display scanout is always non-coherent with the CPU cache.
-*
-* For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
-* require a CPU:WC mapping.
+* DGFX system memory is always WB / ttm_cached, since
+* other caching modes are only supported on x86. DGFX
+* GPU system memory accesses are always coherent with the
+* CPU.
 */
-   if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
-   (xe->info.graphics_verx100 >= 1270 && bo->flags & 
XE_BO_FLAG_PAGETABLE))
-   caching = ttm_write_combined;
+   if (!IS_DGFX(xe)) {
+   switch (bo->cpu_caching) {
+   case DRM_XE_GEM_CPU_CACHING_WC:
+   caching = ttm_write_combined;
+   break;
+   default:
+   caching = ttm_cached;
+   break;
+   }
+
+   WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+   /*
+* Display scanout is always non-coherent with the CPU cache.
+*
+* For Xe_LPG and beyond, PPGTT PTE lookups are also
+* non-coherent and require a CPU:WC mapping.
+*/
+   if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+   (xe->info.graphics_verx100 >= 1270 &&
+bo->flags & XE_BO_FLAG_PAGETABLE))
+   caching = ttm_write_combined;
+   }
 
if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 02d68873558a..ebc8abf7930a 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -68,7 +68,8 @@ struct xe_bo {
 
/**
 * @cpu_caching: CPU caching mode. Currently only used for userspace
-* objects.
+* objects. Exceptions are system memory on DGFX, which is always
+* WB.
 */
u16 cpu_caching;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 33544ef78d3e..19619d4952a8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_

[PATCH v2] drm/xe: Use write-back caching mode for system memory on DGFX

2024-07-05 Thread Thomas Hellström
The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.

However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.

Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.

So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.

Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.

v2:
- Update RB- and Ack tags.
- Rephrase wording in xe_drm.h (Matt Roper)

Signed-off-by: Thomas Hellström 
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra 
Cc: Matthew Auld 
Cc: dri-devel@lists.freedesktop.org
Cc: Joonas Lahtinen 
Cc: Effie Yu 
Cc: Matthew Brost 
Cc: Maarten Lankhorst 
Cc: Jose Souza 
Cc: Michal Mrozek 
Cc:  # v6.8+
Acked-by: Matthew Auld 
Acked-by: José Roberto de Souza 
Reviewed-by: Rodrigo Vivi 
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Acked-by: Michal Mrozek 
Acked-by: Effie Yu  #On chat
---
 drivers/gpu/drm/xe/xe_bo.c   | 47 +++-
 drivers/gpu/drm/xe/xe_bo_types.h |  3 +-
 include/uapi/drm/xe_drm.h|  8 +-
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..31192d983d9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct 
ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
-   enum ttm_caching caching;
+   enum ttm_caching caching = ttm_cached;
int err;
 
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct 
ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
   PAGE_SIZE);
 
-   switch (bo->cpu_caching) {
-   case DRM_XE_GEM_CPU_CACHING_WC:
-   caching = ttm_write_combined;
-   break;
-   default:
-   caching = ttm_cached;
-   break;
-   }
-
-   WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
-* Display scanout is always non-coherent with the CPU cache.
-*
-* For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
-* require a CPU:WC mapping.
+* DGFX system memory is always WB / ttm_cached, since
+* other caching modes are only supported on x86. DGFX
+* GPU system memory accesses are always coherent with the
+* CPU.
 */
-   if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
-   (xe->info.graphics_verx100 >= 1270 && bo->flags & 
XE_BO_FLAG_PAGETABLE))
-   caching = ttm_write_combined;
+   if (!IS_DGFX(xe)) {
+   switch (bo->cpu_caching) {
+   case DRM_XE_GEM_CPU_CACHING_WC:
+   caching = ttm_write_combined;
+   break;
+   default:
+   caching = ttm_cached;
+   break;
+   }
+
+   WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+   /*
+* Display scanout is always non-coherent with the CPU cache.
+*
+* For Xe_LPG and beyond, PPGTT PTE lookups are also
+* non-coherent and require a CPU:WC mapping.
+*/
+   if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+   (xe->info.graphics_verx100 >= 1270 &&
+bo->flags & XE_BO_FLAG_PAGETABLE))
+   caching = ttm_write_combined;
+   }
 
if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 02d68873558a..ebc8abf7930a 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -68,7 +68,8 @@ struct xe_bo {
 
/**
 * @cpu_caching: CPU caching mode. Currently only used for userspace
-* objects.
+* objects. Exceptions are system memory on DGFX, which is always
+* WB.
 */
u16 cpu_caching;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 33544ef78d3e..83474125f3db 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_create {
 #define DRM_XE_GEM_CPU_C

Re: [PATCH v6 04/12] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-07-04 Thread Thomas Hellström
On Thu, 2024-07-04 at 15:13 +0200, Christian König wrote:
> Hey Thomas,
> 
> Am 04.07.24 um 14:41 schrieb Thomas Hellström:
> > Hi, Christian,
> > 
> > On Thu, 2024-07-04 at 11:21 +0200, Christian König wrote:
> > > Am 03.07.24 um 17:38 schrieb Thomas Hellström:
> > > > To address the problem with hitches moving when bulk move
> > > > sublists are lru-bumped, register the list cursors with the
> > > > ttm_lru_bulk_move structure when traversing its list, and
> > > > when lru-bumping the list, move the cursor hitch to the tail.
> > > > This also means it's mandatory for drivers to call
> > > > ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
> > > > initializing and finalizing the bulk move structure, so add
> > > > those calls to the amdgpu- and xe driver.
> > > > 
> > > > Compared to v1 this is slightly more code but less fragile
> > > > and hopefully easier to understand.
> > > This is the only patch in the series which I see critical.
> > > 
> > > I think the final goal when using drm_exec in TTMs eviction path
> > > is
> > > to
> > > keep all evicted (or evicting) BOs locked until we have enough
> > > space.
> > > 
> > > This means that for bulk move sections on the LRU we would lock
> > > the
> > > first BO and would only drop that lock again if we have gone over
> > > the
> > > full bulk move section and know that all BOs are not valuable for
> > > eviction.
> > > 
> > > Because of this the issue of having to consider hitches move with
> > > a
> > > bulk
> > > move section on the LRU doesn't even occur because for that a
> > > concurrent
> > > process would need to grab the common lock of the BOs in the bulk
> > > move
> > > section.
> > While I agree that this is something we should strive towards,
> > following the previous discussion I already reworked this patch
> > completely to remove the dual hitches and make it less fragile.
> 
> Yeah seen that and it indeed makes it much easier to understand
> what's 
> going on.
> 
> > After that you mentioned you were ok with the high level approach
> > for
> > these first four patches here:
> > 
> > https://lists.freedesktop.org/archives/dri-devel/2024-April/450288.html
> > 
> > So is that not any longer the case?
> 
> I'm ok with having it as intermediate step, but for that it's a bit
> much 
> of an hammer.
> 
> On the other hand having clean ttm_lru_bulk_move_init() and 
> ttm_lru_bulk_move_fini() calls is probably something we should keep 
> around anyway.
> 
> > To recap, the concerns I'm seeing with the "kept common lock"
> > approach
> > are
> > 
> > a) Since when we release the LRU lock and the common bulk bo lock
> > is
> > not yet locked, a LRU bump may happen and the hitch will go with
> > it. So
> > to avoid that we need to place the hitch *before* then considered
> > resource in the LRU list rather than *after*. Now on the next
> > iteration
> > we need to come up with some way to choose what's really the next
> > resource? If the next resource pointer is the same we already
> > considered, should we assume it might have been freed and re-
> > alloced
> > with the same virtual address?
> 
> My idea is for the general flow is this:
> 
> 1. Grab the lru lock
> 2. Grab a reference to the BO after the hitch, eventually trylock the
> BO 
> or just continue with a prelocked one
> 3. If locking wasn't successfully
>  4. Drop the lru lock
>  5. Block on the BO lock
>  6. Check that this resource/BO is still the one the cursor
> points 
> to, if not drop the lock and restart from #1
>  7. Grab the lru lock
> 8. Advance the cursor.
> 9. Drop the lru lock.
> 10. Try to evict or swap the BO
> 11. Repeat if still not able to allocate memory.
> 
> The BO could be prelocked if it's part of the currently processed
> bulk 
> or previously contended and prelocked by drm_exec.
> 
> And instead of checking if the resource is in the right domain we
> check 
> if the resource/BO is still the one where the cursor points to.
> 
> This way we don't care if the resource was reallocated and by
> coincident 
> ended up right after the cursor hitch again. As long as we still
> point 
> to the BO we just locked everything is fine.
> 
> > b) It will be up to the user of the lru traversal to actually
> > guarantee
> > that locks are hel

Re: [PATCH v6 04/12] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-07-04 Thread Thomas Hellström
Hi, Christian,

On Thu, 2024-07-04 at 11:21 +0200, Christian König wrote:
> Am 03.07.24 um 17:38 schrieb Thomas Hellström:
> > To address the problem with hitches moving when bulk move
> > sublists are lru-bumped, register the list cursors with the
> > ttm_lru_bulk_move structure when traversing its list, and
> > when lru-bumping the list, move the cursor hitch to the tail.
> > This also means it's mandatory for drivers to call
> > ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
> > initializing and finalizing the bulk move structure, so add
> > those calls to the amdgpu- and xe driver.
> > 
> > Compared to v1 this is slightly more code but less fragile
> > and hopefully easier to understand.
> 
> This is the only patch in the series which I see critical.
> 
> I think the final goal when using drm_exec in TTMs eviction path is
> to 
> keep all evicted (or evicting) BOs locked until we have enough space.
> 
> This means that for bulk move sections on the LRU we would lock the 
> first BO and would only drop that lock again if we have gone over the
> full bulk move section and know that all BOs are not valuable for
> eviction.
> 
> Because of this the issue of having to consider hitches move with a
> bulk 
> move section on the LRU doesn't even occur because for that a
> concurrent 
> process would need to grab the common lock of the BOs in the bulk
> move 
> section.

While I agree that this is something we should strive towards,
following the previous discussion I already reworked this patch
completely to remove the dual hitches and make it less fragile. 
After that you mentioned you were ok with the high level approach for
these first four patches here:

https://lists.freedesktop.org/archives/dri-devel/2024-April/450288.html

So is that not any longer the case?

To recap, the concerns I'm seeing with the "kept common lock" approach
are

a) Since when we release the LRU lock and the common bulk bo lock is
not yet locked, a LRU bump may happen and the hitch will go with it. So
to avoid that we need to place the hitch *before* then considered
resource in the LRU list rather than *after*. Now on the next iteration
we need to come up with some way to choose what's really the next
resource? If the next resource pointer is the same we already
considered, should we assume it might have been freed and re-alloced
with the same virtual address? 

b) It will be up to the user of the lru traversal to actually guarantee
that locks are held across a bulk part, to make the resource traversal
reasonably self-contained. In this case the LRU walker, because there's
where the bo locking happens. 
This means that any other code that aims to walk the LRUs for various
reasons, and doesn't provide any held lock guarantees, may be subject
to unexpected results if someone bumped the LRU.
So we would basically tailor the resource iteration here for a single
use-case and not make it robust for various use-cases.

So my suggestion is we keep this until we've come up with a bullet-
proof way to sort out a) and b) above and then we can rip it out.

/Thomas












> 
> Regards,
> Christian.
> 
> 
> > 
> > Changes in previous series:
> > - Completely rework the functionality
> > - Avoid a NULL pointer dereference assigning manager->mem_type
> > - Remove some leftover code causing build problems
> > v2:
> > - For hitch bulk tail moves, store the mem_type in the cursor
> >    instead of with the manager.
> > v3:
> > - Remove leftover mem_type member from change in v2.
> > v6:
> > - Add some lockdep asserts (Matthew Brost)
> > - Avoid NULL pointer dereference (Matthew Brost)
> > - No need to check bo->resource before dereferencing
> >    bo->bulk_move (Matthew Brost)
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
> >   drivers/gpu/drm/ttm/ttm_resource.c | 92
> > ++
> >   drivers/gpu/drm/xe/xe_vm.c |  4 ++
> >   include/drm/ttm/ttm_resource.h | 56 ++--
> >   4 files changed, 135 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 3abfa66d72a2..97743993d711 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -2420,6 +2420,8 @@ int amdgpu_vm_init(struct amdgpu_device
> > *adev, struct amdgpu_vm *vm,
> >     if (r)
> >     return r;
> >   
> > +   ttm_lru_bulk_move_i

Re: [PATCH] drm/ttm: Always take the bo delayed cleanup path for imported bos

2024-07-04 Thread Thomas Hellström
On Mon, 2024-07-01 at 12:43 +0200, Christian König wrote:
> Am 28.06.24 um 20:13 schrieb Daniel Vetter:
> > On Fri, Jun 28, 2024 at 03:51:33PM +, Matthew Brost wrote:
> > > On Fri, Jun 28, 2024 at 05:38:48PM +0200, Thomas Hellström wrote:
> > > > Bos can be put with multiple unrelated dma-resv locks held. But
> > > > imported bos attempt to grab the bo dma-resv during dma-buf
> > > > detach
> > > > that typically happens during cleanup. That leads to lockde
> > > > splats
> > > > similar to the below and a potential ABBA deadlock.
> > > > 
> > > > Fix this by always taking the delayed workqueue cleanup path
> > > > for
> > > > imported bos.
> > > > 
> > > > Requesting stable fixes from when the Xe driver was introduced,
> > > > since its usage of drm_exec and wide vm dma_resvs appear to be
> > > > the first reliable trigger of this.
> > > > 
> > > > [22982.116427] 
> > > > [22982.116428] WARNING: possible recursive locking detected
> > > > [22982.116429] 6.10.0-rc2+ #10 Tainted: G U  W
> > > > [22982.116430] 
> > > > [22982.116430] glxgears:sh0/5785 is trying to acquire lock:
> > > > [22982.116431] 8c2bafa539a8
> > > > (reservation_ww_class_mutex){+.+.}-{3:3}, at:
> > > > dma_buf_detach+0x3b/0xf0
> > > > [22982.116438]
> > > >     but task is already holding lock:
> > > > [22982.116438] 8c2d9aba6da8
> > > > (reservation_ww_class_mutex){+.+.}-{3:3}, at:
> > > > drm_exec_lock_obj+0x49/0x2b0 [drm_exec]
> > > > [22982.116442]
> > > >     other info that might help us debug this:
> > > > [22982.116442]  Possible unsafe locking scenario:
> > > > 
> > > > [22982.116443]    CPU0
> > > > [22982.116444]    
> > > > [22982.116444]   lock(reservation_ww_class_mutex);
> > > > [22982.116445]   lock(reservation_ww_class_mutex);
> > > > [22982.116447]
> > > >  *** DEADLOCK ***
> > > > 
> > > > [22982.116447]  May be due to missing lock nesting notation
> > > > 
> > > > [22982.116448] 5 locks held by glxgears:sh0/5785:
> > > > [22982.116449]  #0: 8c2d9aba58c8 (>vm.lock){+.+.}-
> > > > {3:3}, at: xe_file_close+0xde/0x1c0 [xe]
> > > > [22982.116507]  #1: 8c2e28cc8480 (>lock){}-{3:3},
> > > > at: xe_vm_close_and_put+0x161/0x9b0 [xe]
> > > > [22982.116578]  #2: 8c2e31982970 (>lock){.+.+}-{3:3},
> > > > at: xe_validation_ctx_init+0x6d/0x70 [xe]
> > > > [22982.116647]  #3: acdc469478a8
> > > > (reservation_ww_class_acquire){+.+.}-{0:0}, at:
> > > > xe_vma_destroy_unlocked+0x7f/0xe0 [xe]
> > > > [22982.116716]  #4: 8c2d9aba6da8
> > > > (reservation_ww_class_mutex){+.+.}-{3:3}, at:
> > > > drm_exec_lock_obj+0x49/0x2b0 [drm_exec]
> > > > [22982.116719]
> > > >     stack backtrace:
> > > > [22982.116720] CPU: 8 PID: 5785 Comm: glxgears:sh0 Tainted:
> > > > G U  W  6.10.0-rc2+ #10
> > > > [22982.116721] Hardware name: ASUS System Product Name/PRIME
> > > > B560M-A AC, BIOS 2001 02/01/2023
> > > > [22982.116723] Call Trace:
> > > > [22982.116724]  
> > > > [22982.116725]  dump_stack_lvl+0x77/0xb0
> > > > [22982.116727]  __lock_acquire+0x1232/0x2160
> > > > [22982.116730]  lock_acquire+0xcb/0x2d0
> > > > [22982.116732]  ? dma_buf_detach+0x3b/0xf0
> > > > [22982.116734]  ? __lock_acquire+0x417/0x2160
> > > > [22982.116736]  __ww_mutex_lock.constprop.0+0xd0/0x13b0
> > > > [22982.116738]  ? dma_buf_detach+0x3b/0xf0
> > > > [22982.116741]  ? dma_buf_detach+0x3b/0xf0
> > > > [22982.116743]  ? ww_mutex_lock+0x2b/0x90
> > > > [22982.116745]  ww_mutex_lock+0x2b/0x90
> > > > [22982.116747]  dma_buf_detach+0x3b/0xf0
> > > > [22982.116749]  drm_prime_gem_destroy+0x2f/0x40 [drm]
> > > > [22982.116775]  xe_ttm_bo_destroy+0x32/0x220 [xe]
> > > > [22982.116818]  ? __mutex_unlock_slowpath+0x3a/0x290
> > > > [22982.116821]  drm_exec_unlock_all+0xa1/0xd0 [drm_exec]
> > > > [22982.116823]  drm_exec_fini+0x12/0xb0 [drm_exec]
> > > > [22982.116824]  xe_validation_ctx_fini+0x15/0x40 [xe]
> > &g

Re: linux-next: build failure after merge of the drm

2024-07-04 Thread Thomas Hellström
Hi

On Wed, 2024-07-03 at 13:46 +0200, Michal Wajdeczko wrote:
> + Rodrigo for help
> 
> On 03.07.2024 04:36, Stephen Rothwell wrote:
> > Hi all,
> > 
> > On Fri, 28 Jun 2024 18:03:39 +0100 Mark Brown 
> > wrote:
> > > 
> > > After merging the drm tree, today's linux-next build (x86_64
> > > allmodconfig) failed like this:
> > > 
> > > /tmp/next/build/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c: In
> > > function 'pf_get_threshold':
> > > /tmp/next/build/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c:1788:2
> > > 7: error: unused variable 'xe' [-Werror=unused-variable]
> > >  1788 | struct xe_device *xe = gt_to_xe(gt);
> > >   |   ^~
> > > cc1: all warnings being treated as errors
> > > 
> > > Caused by commit
> > > 
> > >   629df234bfe73d ("drm/xe/pf: Introduce functions to configure VF
> > > thresholds")
> > > 
> > > I have used the tree from 20240627 instead.
> > 
> > I am still seeing that build failure.
> > 

I see that git for an unknown reason introduces this line as an
automatic resolve merging drm-next into the drm-tip build. Later there
was a manual fixup for this after merging another branch into drm-tip
but that's too late.

So I've added a manual fixup to drm-rerere to remove this line just
after the merge that somehow introduces it.

/Thomas



Re: [PATCH 3/5] drm/exec: provide trylock interface for eviction

2024-07-03 Thread Thomas Hellström
On Wed, 2024-07-03 at 15:26 +0200, Christian König wrote:
> The TTM eviction path has some additional requirements which make it
> necessary to trylock an object and then eventually keep or drop the
> lock
> again.
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/drm_exec.c | 77
> ++
>  include/drm/drm_exec.h |  5 +++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
> index 220df336fbd9..b81bf5a92d97 100644
> --- a/drivers/gpu/drm/drm_exec.c
> +++ b/drivers/gpu/drm/drm_exec.c
> @@ -336,5 +336,82 @@ int drm_exec_prepare_array(struct drm_exec
> *exec,
>  }
>  EXPORT_SYMBOL(drm_exec_prepare_array);
>  
> +/**
> + * drm_exec_trylock_obj - try to lock a GEM object
> + * @exec: the drm_exec object with the state
> + * @obj: the GEM object to trylock
> + *
> + * Try to lock a GEM object but don't grab a reference yet.
> + *
> + * Since we can't handle contention here it's illegal to trylock the
> first
> + * object.
> + *
> + * This function is suposed to be used from atomic context and we
> don't
> + * know if the GEM object will actually be used or not. So we don't
> grab a
> + * reference yet.

With the pending LRU walker the *need* for atomic context here is gone.

> + *
> + * Returns: True if the object could be locked, false otherwise.
> + */
> +bool drm_exec_trylock_obj(struct drm_exec *exec, struct
> drm_gem_object *obj)
> +{
> + if (WARN_ON(!exec->num_objects))
> + return false;

I think we were in the middle of the discussion here about how to
handle this. IIRC the last suggestion was to 

if (exec->contended)
 return false;

and provide a 

drm_exec_sanitize_for_trylock() function that could be used to pre-lock
the contended lock (and perhaps provide any needed memory to register
so that a lock in atomic context could be made)

The use-case I'm worried about moving forward is, again, bo creation
where I think a push out of the validation will make the conversion of
drm_exec buffer object creation in drivers much more complicated than
it already is. Or perhaps I'm misunderstanding how that was supposed to
work.

/Thomas


> +
> + if (exec->prelocked == obj)
> + return true;
> +
> + return dma_resv_trylock_ctx(obj->resv, >ticket);
> +}
> +EXPORT_SYMBOL(drm_exec_trylock_obj);
> +
> +/**
> + * drm_exec_keep_trylocked_obj - keep the trylocked obj
> + * @exec: the drm_exec object with the state
> + * @obj: the GEM object to trylock
> + *
> + * Keep a trylocked object in the drm_exec state object. Grabs a
> reference to
> + * the object and adds it to the container of locked objects.
> + */

So these could be dropped.


> +int drm_exec_keep_trylocked_obj(struct drm_exec *exec,
> + struct drm_gem_object *obj)
> +{
> + int ret;
> +
> + ret = drm_exec_obj_locked(exec, obj);
> + if (ret) {
> + dma_resv_unlock(obj->resv);
> + return ret;
> + }
> +
> + if (exec->prelocked == obj) {
> + drm_gem_object_put(exec->prelocked);
> + exec->prelocked = NULL;
> + }
> +
> + return ret;
> +}
> +EXPORT_SYMBOL(drm_exec_keep_trylocked_obj);
> +
> +/**
> + * drm_exec_drop_trylocked_obj - drop the trylocked obj
> + * @exec: the drm_exec object with the state
> + * @obj: the GEM object to trylock
> + *
> + * Used to drop a trylocked object in the drm_exec state object,
> drop the
> + * reservation lock again and cleanup all references.
> + */
> +void drm_exec_drop_trylocked_obj(struct drm_exec *exec,
> +  struct drm_gem_object *obj)
> +{
> + /*
> +  * We can't drop the reference of prelocked objects since we
> might still
> +  * be in atomic context. Additionally it makes sense to keep
> the
> +  * prelocked object around since we might need it again
> later on.
> +  */
> + if (exec->prelocked != obj)
> + dma_resv_unlock(obj->resv);
> +}
> +EXPORT_SYMBOL(drm_exec_drop_trylocked_obj);
> +
>  MODULE_DESCRIPTION("DRM execution context");
>  MODULE_LICENSE("Dual MIT/GPL");
> diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
> index aa786b828a0a..a3943057a3e8 100644
> --- a/include/drm/drm_exec.h
> +++ b/include/drm/drm_exec.h
> @@ -146,5 +146,10 @@ int drm_exec_prepare_array(struct drm_exec
> *exec,
>      struct drm_gem_object **objects,
>      unsigned int num_objects,
>      unsigned int num_fences);
> +bool drm_exec_trylock_obj(struct drm_exec *exec, struct
> drm_gem_object *obj);
> +int drm_exec_keep_trylocked_obj(struct drm_exec *exec,
> + struct drm_gem_object *obj);
> +void drm_exec_drop_trylocked_obj(struct drm_exec *exec,
> + struct drm_gem_object *obj);
>  
>  #endif



Re: [PATCH 2/5] drm/exec: don't immediately add the prelocked obj

2024-07-03 Thread Thomas Hellström
On Wed, 2024-07-03 at 15:25 +0200, Christian König wrote:
> Some contended objects might never be locked again in the case of
> eviction
> handling for example.
> 
> Make sure that those doesn't show up in the list of locked objects
> until
> they are explicitely mentioned.

Could you be a bit more specific in the commit message about in what
situations that is bad?

/Thomas



> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/drm_exec.c | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
> index 2da094bdf8a4..220df336fbd9 100644
> --- a/drivers/gpu/drm/drm_exec.c
> +++ b/drivers/gpu/drm/drm_exec.c
> @@ -61,8 +61,11 @@ static void drm_exec_unlock_all(struct drm_exec
> *exec)
>   drm_gem_object_put(obj);
>   }
>  
> - drm_gem_object_put(exec->prelocked);
> - exec->prelocked = NULL;
> + if (exec->prelocked) {
> + dma_resv_unlock(exec->prelocked->resv);
> + drm_gem_object_put(exec->prelocked);
> + exec->prelocked = NULL;
> + }
>  }
>  
>  /**
> @@ -179,16 +182,9 @@ static int drm_exec_lock_contended(struct
> drm_exec *exec)
>   dma_resv_lock_slow(obj->resv, >ticket);
>   }
>  
> - ret = drm_exec_obj_locked(exec, obj);
> - if (unlikely(ret))
> - goto error_unlock;
> -
>   exec->prelocked = obj;
>   return 0;
>  
> -error_unlock:
> - dma_resv_unlock(obj->resv);
> -
>  error_dropref:
>   drm_gem_object_put(obj);
>   return ret;
> @@ -214,6 +210,10 @@ int drm_exec_lock_obj(struct drm_exec *exec,
> struct drm_gem_object *obj)
>   return ret;
>  
>   if (exec->prelocked == obj) {
> + ret = drm_exec_obj_locked(exec, obj);
> + if (unlikely(ret))
> + return ret;
> +
>   drm_gem_object_put(exec->prelocked);
>   exec->prelocked = NULL;
>   return 0;



[PATCH v6 12/12] drm/xe: Increase the XE_PL_TT watermark

2024-07-03 Thread Thomas Hellström
The XE_PL_TT watermark was set to 50% of system memory.
The idea behind that was unclear since the net effect is that
TT memory will be evicted to TTM_PL_SYSTEM memory if that
watermark is exceeded, requiring PPGTT rebinds and dma
remapping. But there is no similar watermark for TTM_PL_SYSTEM
memory.

The TTM functionality that tries to swap out system memory to
shmem objects if a 50% limit of total system memory is reached
is orthogonal to this, and with the shrinker added, it's no
longer in effect.

Replace the 50% TTM_PL_TT limit with a 100% limit, in effect
allowing all graphics memory to be bound to the device unless it
has been swapped out by the shrinker.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c 
b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 9844a8edbfe1..d38b91872da3 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -108,9 +108,8 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe)
u64 gtt_size;
 
si_meminfo();
+   /* Potentially restrict amount of TT memory here. */
gtt_size = (u64)si.totalram * si.mem_unit;
-   /* TTM limits allocation of all TTM devices by 50% of system memory */
-   gtt_size /= 2;
 
man->use_tt = true;
man->func = _ttm_sys_mgr_func;
-- 
2.44.0



[PATCH v6 11/12] drm/ttm, drm/xe: Add a shrinker for xe bos

2024-07-03 Thread Thomas Hellström
Rather than relying on the TTM watermark accounting add a shrinker
for xe_bos in TT or system memory.

Leverage the newly added TTM per-page shrinking and shmem backup
support.

Although xe doesn't fully support WONTNEED (purgeable) bos yet,
introduce and add shrinker support for purgeable ttm_tts.

v2:
- Cleanups bugfixes and a KUNIT shrinker test.
- Add writeback support, and activate if kswapd.
v3:
- Move the try_shrink() helper to core TTM.
- Minor cleanups.
v4:
- Add runtime pm for the shrinker. Shrinking may require an active
  device for CCS metadata copying.
v5:
- Separately purge ghost- and zombie objects in the shrinker.
- Fix a format specifier - type inconsistency. (Kernel test robot).

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c |  67 ++
 drivers/gpu/drm/xe/Makefile   |   1 +
 drivers/gpu/drm/xe/tests/xe_bo.c  | 118 +++
 drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
 drivers/gpu/drm/xe/xe_bo.c| 155 --
 drivers/gpu/drm/xe/xe_bo.h|  26 +++
 drivers/gpu/drm/xe/xe_device.c|   8 +
 drivers/gpu/drm/xe/xe_device_types.h  |   2 +
 drivers/gpu/drm/xe/xe_shrinker.c  | 287 ++
 drivers/gpu/drm/xe/xe_shrinker.h  |  18 ++
 include/drm/ttm/ttm_bo.h  |   3 +
 12 files changed, 671 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_shrinker.c
 create mode 100644 drivers/gpu/drm/xe/xe_shrinker.h

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index c4f678f30fc2..563e96a4cf06 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -924,3 +924,70 @@ long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, 
struct ttm_device *bdev,
 
return progress;
 }
+EXPORT_SYMBOL(ttm_lru_walk_for_evict);
+
+/**
+ * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
+ * @walk: The struct xe_ttm_lru_walk that describes the walk.
+ * @bo: The buffer object.
+ * @purge: Whether to attempt to purge the bo content since it's no
+ * longer needed.
+ * @writeback: If !@purge, attempt to write out to persistent storage.
+ *
+ * The function uses the ttm_tt_back_up functionality to back up or
+ * purge a struct ttm_tt. If the bo is not in system, it's first
+ * moved there.
+ *
+ * Return: The number of pages shrunken or purged, or
+ * negative error code on failure.
+ */
+long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+  bool purge, bool writeback)
+{
+   static const struct ttm_place sys_placement_flags = {
+   .fpfn = 0,
+   .lpfn = 0,
+   .mem_type = TTM_PL_SYSTEM,
+   .flags = 0,
+   };
+   static struct ttm_placement sys_placement = {
+   .num_placement = 1,
+   .placement = _placement_flags,
+   };
+   struct ttm_operation_ctx *ctx = walk->ctx;
+   struct ttm_tt *tt = bo->ttm;
+   long lret;
+
+   dma_resv_assert_held(bo->base.resv);
+
+   if (!tt || !ttm_tt_is_populated(tt))
+   return 0;
+
+   if (bo->resource->mem_type != TTM_PL_SYSTEM) {
+   int ret = ttm_bo_validate(bo, _placement, ctx);
+
+   if (ret) {
+   if (ret == -EINTR || ret == -EDEADLK ||
+   ret == -ERESTARTSYS)
+   return ret;
+   return 0;
+   }
+   }
+
+   lret = ttm_bo_wait_ctx(bo, ctx);
+   if (lret < 0) {
+   if (lret == -ERESTARTSYS)
+   return lret;
+   return 0;
+   }
+
+   if (bo->deleted)
+   lret = ttm_tt_backup(bo->bdev, tt, true, writeback);
+   else
+   lret = ttm_tt_backup(bo->bdev, tt, purge, writeback);
+   if (lret < 0 && lret != -EINTR)
+   return 0;
+
+   return lret;
+}
+EXPORT_SYMBOL(ttm_bo_try_shrink);
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b1e03bfe4a68..1eba51bdd172 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -112,6 +112,7 @@ xe-y += xe_bb.o \
xe_ring_ops.o \
xe_sa.o \
xe_sched_job.o \
+   xe_shrinker.o \
xe_step.o \
xe_sync.o \
xe_tile.o \
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9f3c02826464..49617f16dc76 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,8 @@
 #include 
 #include 
 
+#include 
+
 #include "tests/xe_bo_test.h"
 #include "tests/xe_pci_test.h"
 #include "tests/xe_test.h"
@@ -350,3 +352,119 @@ void xe_bo_evict_kunit(struct kunit *test)
xe_cal

[PATCH v6 10/12] drm/ttm: Use fault-injection to test error paths

2024-07-03 Thread Thomas Hellström
Use fault-injection to test partial TTM swapout and interrupted swapin.
Return -EINTR for swapin to test the callers ability to handle and
restart the swapin, and on swapout perform a partial swapout to test that
the swapin and release_shrunken functionality.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/Kconfig| 10 ++
 drivers/gpu/drm/ttm/ttm_pool.c | 17 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index fd0749c0c630..9f27271bfab8 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -272,6 +272,16 @@ config DRM_GPUVM
  GPU-VM representation providing helpers to manage a GPUs virtual
  address space
 
+config DRM_TTM_BACKUP_FAULT_INJECT
+   bool "Enable fault injection during TTM backup"
+   depends on DRM_TTM
+   default n
+   help
+ Inject recoverable failures during TTM backup and recovery of
+ backed-up objects. For DRM driver developers only.
+
+ If in doubt, choose N.
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 38e50cf81b0a..d32a1f2e5e50 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -431,6 +431,7 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore 
*restore,
   struct ttm_backup *backup,
   struct ttm_operation_ctx *ctx)
 {
+   static unsigned long __maybe_unused swappedin;
unsigned int i, nr = 1 << restore->order;
int ret = 0;
 
@@ -446,6 +447,13 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore 
*restore,
if (handle == 0)
continue;
 
+   if (IS_ENABLED(CONFIG_DRM_TTM_BACKUP_FAULT_INJECT) &&
+   ctx->interruptible &&
+   ++swappedin % 100 == 0) {
+   ret = -EINTR;
+   break;
+   }
+
ret = backup->ops->copy_backed_up_page
(backup, restore->first_page[i],
 handle, ctx->interruptible);
@@ -892,7 +900,14 @@ long ttm_pool_backup_tt(struct ttm_pool *pool, struct 
ttm_tt *ttm, bool purge,
 
alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | 
__GFP_RETRY_MAYFAIL;
 
-   for (i = 0; i < ttm->num_pages; ++i) {
+   num_pages = ttm->num_pages;
+
+   /* Pretend doing fault injection by shrinking only half of the pages. */
+
+   if (IS_ENABLED(CONFIG_DRM_TTM_BACKUP_FAULT_INJECT))
+   num_pages = DIV_ROUND_UP(num_pages, 2);
+
+   for (i = 0; i < num_pages; ++i) {
page = ttm->pages[i];
if (unlikely(!page))
continue;
-- 
2.44.0



[PATCH v6 09/12] drm/ttm/pool: Provide a helper to shrink pages

2024-07-03 Thread Thomas Hellström
Provide a helper to shrink ttm_tt page-vectors on a per-page
basis. A ttm_backup backend could then in theory get away with
allocating a single temporary page for each struct ttm_tt.

This is accomplished by splitting larger pages before trying to
back them up.

In the future we could allow ttm_backup to handle backing up
large pages as well, but currently there's no benefit in
doing that, since the shmem backup backend would have to
split those anyway to avoid allocating too much temporary
memory, and if the backend instead inserts pages into the
swap-cache, those are split on reclaim by the core.

Due to potential backup- and recover errors, allow partially swapped
out struct ttm_tt's, although mark them as swapped out stopping them
from being swapped out a second time. More details in the ttm_pool.c
DOC section.

v2:
- A couple of cleanups and error fixes in ttm_pool_back_up_tt.
- s/back_up/backup/
- Add a writeback parameter to the exported interface.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 397 +++--
 drivers/gpu/drm/ttm/ttm_tt.c   |  37 +++
 include/drm/ttm/ttm_pool.h |   5 +
 include/drm/ttm/ttm_tt.h   |  20 ++
 4 files changed, 446 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..38e50cf81b0a 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -41,6 +41,7 @@
 #include 
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -58,6 +59,32 @@ struct ttm_pool_dma {
unsigned long vaddr;
 };
 
+/**
+ * struct ttm_pool_tt_restore - State representing restore from backup
+ * @alloced_pages: Total number of already allocated pages for the ttm_tt.
+ * @restored_pages: Number of (sub) pages restored from swap for this
+ *  chunk of 1 << @order pages.
+ * @first_page: The ttm page ptr representing for @old_pages[0].
+ * @caching_divide: Page pointer where subsequent pages are cached.
+ * @old_pages: Backup copy of page pointers that were replaced by the new
+ *page allocation.
+ * @pool: The pool used for page allocation while restoring.
+ * @order: The order of the last page allocated while restoring.
+ *
+ * Recovery from backup might fail when we've recovered less than the
+ * full ttm_tt. In order not to loose any data (yet), keep information
+ * around that allows us to restart a failed ttm backup recovery.
+ */
+struct ttm_pool_tt_restore {
+   pgoff_t alloced_pages;
+   pgoff_t restored_pages;
+   struct page **first_page;
+   struct page **caching_divide;
+   struct ttm_pool *pool;
+   unsigned int order;
+   struct page *old_pages[];
+};
+
 static unsigned long page_pool_size;
 
 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
@@ -354,11 +381,102 @@ static unsigned int ttm_pool_page_order(struct ttm_pool 
*pool, struct page *p)
return p->private;
 }
 
+/*
+ * To be able to insert single pages into backup directly,
+ * we need to split multi-order page allocations and make them look
+ * like single-page allocations.
+ */
+static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
+{
+   unsigned int order = ttm_pool_page_order(pool, p);
+   pgoff_t nr;
+
+   if (!order)
+   return;
+
+   split_page(p, order);
+   nr = 1UL << order;
+   while (nr--)
+   (p++)->private = 0;
+}
+
+/**
+ * DOC: Partial backup and restoration of a struct ttm_tt.
+ *
+ * Swapout using ttm_backup::ops::backup_page() and swapin using
+ * ttm_backup::ops::copy_backed_up_page() may fail.
+ * The former most likely due to lack of swap-space or memory, the latter due
+ * to lack of memory or because of signal interruption during waits.
+ *
+ * Backupfailure is easily handled by using a ttm_tt pages vector that holds
+ * both swap entries and page pointers. This has to be taken into account when
+ * restoring such a ttm_tt from backup, and when freeing it while backed up.
+ * When restoring, for simplicity, new pages are actually allocated from the
+ * pool and the contents of any old pages are copied in and then the old pages
+ * are released.
+ *
+ * For restoration failures, the struct ttm_pool_tt_restore holds sufficient 
state
+ * to be able to resume an interrupted restore, and that structure is freed 
once
+ * the restoration is complete. If the struct ttm_tt is destroyed while there
+ * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
+ * care of.
+ */
+
+static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
+{
+   return restore && restore->restored_pages < (1 << restore->order);
+}
+
+static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
+  struct ttm_ba

[PATCH v6 08/12] drm/ttm: Add a virtual base class for graphics memory backup

2024-07-03 Thread Thomas Hellström
Initially intended for experimenting with different backup
solutions (shmem vs direct swap cache insertion), abstract
the backup destination using a virtual base class.

Also provide a sample implementation for shmem.

While when settling on a preferred backup solution, one could
perhaps skip the abstraction, this functionality may actually
come in handy for configurable dedicated graphics memory
backup to fast nvme files or similar, whithout affecting
swap-space. Could indeed be useful for VRAM backup on S4 and
other cases.

v5:
- Fix a UAF. (kernel test robot, Dan Carptenter)
v6:
- Rename ttm_backup_shmem_copy_page() function argument
  (Matthew Brost)
- Add some missing documentation

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/Makefile   |   2 +-
 drivers/gpu/drm/ttm/ttm_backup_shmem.c | 139 +
 include/drm/ttm/ttm_backup.h   | 137 
 3 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_backup_shmem.c
 create mode 100644 include/drm/ttm/ttm_backup.h

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index dad298127226..5e980dd90e41 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,7 +4,7 @@
 
 ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \
-   ttm_device.o ttm_sys_manager.o
+   ttm_device.o ttm_sys_manager.o ttm_backup_shmem.o
 ttm-$(CONFIG_AGP) += ttm_agp_backend.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_backup_shmem.c 
b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
new file mode 100644
index ..3d23a34d9f34
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include 
+#include 
+
+/**
+ * struct ttm_backup_shmem - A shmem based ttm_backup subclass.
+ * @backup: The base struct ttm_backup
+ * @filp: The associated shmem object
+ */
+struct ttm_backup_shmem {
+   struct ttm_backup backup;
+   struct file *filp;
+};
+
+static struct ttm_backup_shmem *to_backup_shmem(struct ttm_backup *backup)
+{
+   return container_of(backup, struct ttm_backup_shmem, backup);
+}
+
+static void ttm_backup_shmem_drop(struct ttm_backup *backup, unsigned long 
handle)
+{
+   handle -= 1;
+   shmem_truncate_range(file_inode(to_backup_shmem(backup)->filp), handle,
+handle + 1);
+}
+
+static int ttm_backup_shmem_copy_page(struct ttm_backup *backup, struct page 
*dst,
+ unsigned long handle, bool intr)
+{
+   struct file *filp = to_backup_shmem(backup)->filp;
+   struct address_space *mapping = filp->f_mapping;
+   struct folio *from_folio;
+
+   handle -= 1;
+   from_folio = shmem_read_folio(mapping, handle);
+   if (IS_ERR(from_folio))
+   return PTR_ERR(from_folio);
+
+   /* Note: Use drm_memcpy_from_wc? */
+   copy_highpage(dst, folio_file_page(from_folio, handle));
+   folio_put(from_folio);
+
+   return 0;
+}
+
+static unsigned long
+ttm_backup_shmem_backup_page(struct ttm_backup *backup, struct page *page,
+bool writeback, pgoff_t i, gfp_t page_gfp,
+gfp_t alloc_gfp)
+{
+   struct file *filp = to_backup_shmem(backup)->filp;
+   struct address_space *mapping = filp->f_mapping;
+   unsigned long handle = 0;
+   struct folio *to_folio;
+   int ret;
+
+   to_folio = shmem_read_folio_gfp(mapping, i, alloc_gfp);
+   if (IS_ERR(to_folio))
+   return handle;
+
+   folio_mark_accessed(to_folio);
+   folio_lock(to_folio);
+   folio_mark_dirty(to_folio);
+   copy_highpage(folio_file_page(to_folio, i), page);
+   handle = i + 1;
+
+   if (writeback && !folio_mapped(to_folio) && 
folio_clear_dirty_for_io(to_folio)) {
+   struct writeback_control wbc = {
+   .sync_mode = WB_SYNC_NONE,
+   .nr_to_write = SWAP_CLUSTER_MAX,
+   .range_start = 0,
+   .range_end = LLONG_MAX,
+   .for_reclaim = 1,
+   };
+   folio_set_reclaim(to_folio);
+   ret = mapping->a_ops->writepage(folio_page(to_folio, 0), );
+   if (!folio_test_writeback(to_folio))
+   folio_clear_reclaim(to_folio);
+   /* If writepage succeeds, it unlocks the folio */
+   if (ret)
+   folio_unlock(to_folio);
+   } else {
+   folio_unlock(to_folio);
+   }
+
+   folio_put(to_folio);
+
+   return handle;
+}
+
+static void ttm_backup_shmem_fini(struct 

[PATCH v6 07/12] drm/ttm: Use the LRU walker for eviction

2024-07-03 Thread Thomas Hellström
Use the LRU walker for eviction. This helps
removing a lot of code with weird locking
semantics.

The functionality is slightly changed so that
when trylocked buffer objects are exhausted, we
continue to interleave walks with ticket-locks while
there is still progress made. The list walks are
not restarted in-between evictions.

Also provide a separate ttm_bo_evict_first()
function for its single user. The context of that
user allows sleeping dma_resv locks.

v6:
- Various cleanups suggested by Matthew Brost.
- Fix error return code of ttm_bo_evict_first(). (Matthew Brost)
- Fix an error check that was inverted. (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 346 -
 drivers/gpu/drm/ttm/ttm_resource.c |  21 +-
 include/drm/ttm/ttm_bo.h   |   8 +-
 3 files changed, 144 insertions(+), 231 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1053cdca131e..603b9353f436 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct 
ttm_buffer_object *bo)
dma_resv_iter_end();
 }
 
-/**
- * ttm_bo_cleanup_refs
- * If bo idle, remove from lru lists, and unref.
- * If not idle, block if possible.
- *
- * Must be called with lru_lock and reservation held, this function
- * will drop the lru lock and optionally the reservation lock before returning.
- *
- * @bo:The buffer object to clean-up
- * @interruptible: Any sleeps should occur interruptibly.
- * @no_wait_gpu:   Never wait for gpu. Return -EBUSY instead.
- * @unlock_resv:   Unlock the reservation lock as well.
- */
-
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-  bool interruptible, bool no_wait_gpu,
-  bool unlock_resv)
-{
-   struct dma_resv *resv = >base._resv;
-   int ret;
-
-   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
-   ret = 0;
-   else
-   ret = -EBUSY;
-
-   if (ret && !no_wait_gpu) {
-   long lret;
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-
-   lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
-interruptible,
-30 * HZ);
-
-   if (lret < 0)
-   return lret;
-   else if (lret == 0)
-   return -EBUSY;
-
-   spin_lock(>bdev->lru_lock);
-   if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
-   /*
-* We raced, and lost, someone else holds the 
reservation now,
-* and is probably busy in ttm_bo_cleanup_memtype_use.
-*
-* Even if it's not the case, because we finished 
waiting any
-* delayed destruction would succeed, so just return 
success
-* here.
-*/
-   spin_unlock(>bdev->lru_lock);
-   return 0;
-   }
-   ret = 0;
-   }
-
-   if (ret) {
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-   return ret;
-   }
-
-   spin_unlock(>bdev->lru_lock);
-   ttm_bo_cleanup_memtype_use(bo);
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-
-   return 0;
-}
-
 /*
  * Block for the dma_resv object to become idle, lock the buffer and clean up
  * the resource and tt object.
@@ -505,151 +431,153 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object 
*bo,
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
-/*
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
+/**
+ * ttm_bo_evict_first() - Evict the first bo on the manager's LRU list.
+ * @bdev: The ttm device.
+ * @man: The manager whose bo to evict.
+ * @ctx: The TTM operation ctx governing the eviction.
  *
- * b. Otherwise, trylock it.
+ * Return: 0 if successful or the resource disappeared. Negative error code on 
error.
  */
-static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-  struct ttm_operation_ctx *ctx,
-

[PATCH v6 06/12] drm/ttm: Use the LRU walker helper for swapping

2024-07-03 Thread Thomas Hellström
Rework the TTM swapping to use the LRU walker helper.
This helps fixing up the ttm_bo_swapout() interface
to be consistent about not requiring any locking.

For now mimic the current behaviour of using trylock
only. We could be using ticket-locks here but defer
that until it's deemed necessary. The TTM swapout
functionality is a bit weird anyway since it
alternates between memory types without exhausting
TTM_PL_SYSTEM first.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 

v6:
- Improve on error code translation in the swapout callback
  (Matthew Brost).

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 111 ---
 drivers/gpu/drm/ttm/ttm_device.c |  30 ++---
 include/drm/ttm/ttm_bo.h |   5 +-
 3 files changed, 82 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 43eda720657f..1053cdca131e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1118,11 +1118,23 @@ int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, 
struct ttm_operation_ctx *ctx)
 }
 EXPORT_SYMBOL(ttm_bo_wait_ctx);
 
-int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-  gfp_t gfp_flags)
+/**
+ * struct ttm_bo_swapout_walk - Parameters for the swapout walk
+ */
+struct ttm_bo_swapout_walk {
+   /** @walk: The walk base parameters. */
+   struct ttm_lru_walk walk;
+   /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */
+   gfp_t gfp_flags;
+};
+
+static long
+ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 {
-   struct ttm_place place;
-   bool locked;
+   struct ttm_place place = {.mem_type = bo->resource->mem_type};
+   struct ttm_bo_swapout_walk *swapout_walk =
+   container_of(walk, typeof(*swapout_walk), walk);
+   struct ttm_operation_ctx *ctx = walk->ctx;
long ret;
 
/*
@@ -1131,28 +1143,29 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * The driver may use the fact that we're moving from SYSTEM
 * as an indication that we're about to swap out.
 */
-   memset(, 0, sizeof(place));
-   place.mem_type = bo->resource->mem_type;
-   if (!ttm_bo_evict_swapout_allowable(bo, ctx, , , NULL))
-   return -EBUSY;
+   if (!bo->bdev->funcs->eviction_valuable(bo, )) {
+   ret = -EBUSY;
+   goto out;
+   }
 
if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL ||
-   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED ||
-   !ttm_bo_get_unless_zero(bo)) {
-   if (locked)
-   dma_resv_unlock(bo->base.resv);
-   return -EBUSY;
+   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) {
+   ret = -EBUSY;
+   goto out;
}
 
if (bo->deleted) {
-   ret = ttm_bo_cleanup_refs(bo, false, false, locked);
-   ttm_bo_put(bo);
-   return ret == -EBUSY ? -ENOSPC : ret;
-   }
+   pgoff_t num_pages = bo->ttm->num_pages;
 
-   /* TODO: Cleanup the locking */
-   spin_unlock(>bdev->lru_lock);
+   ret = ttm_bo_wait_ctx(bo, ctx);
+   if (ret)
+   goto out;
+
+   ttm_bo_cleanup_memtype_use(bo);
+   ret = num_pages;
+   goto out;
+   }
 
/*
 * Move to system cached
@@ -1164,12 +1177,13 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
memset(, 0, sizeof(hop));
place.mem_type = TTM_PL_SYSTEM;
ret = ttm_resource_alloc(bo, , _mem);
-   if (unlikely(ret))
+   if (ret)
goto out;
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, );
-   if (unlikely(ret != 0)) {
-   WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput 
- likely driver bug.\n");
+   if (ret) {
+   WARN(ret == -EMULTIHOP,
+"Unexpected multihop in swapout - likely driver 
bug.\n");
ttm_resource_free(bo, _mem);
goto out;
}
@@ -1179,30 +1193,53 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * Make sure BO is idle.
 */
ret = ttm_bo_wait_ctx(bo, ctx);
-   if (unlikely(ret != 0))
+   if (ret)
goto out;
 
ttm_bo_unmap_virtual(bo);
-
-   /*
-* Swap out. Buffer will be swapped in again as soon as
-* anyone tries to access a ttm page.
-*/
if (bo->bdev->funcs-&

[PATCH v6 05/12] drm/ttm: Provide a generic LRU walker helper

2024-07-03 Thread Thomas Hellström
Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOSPC and then to -ENOMEM before reaching
the driver, so that the driver will need to backoff and possibly retry
without being able to keep the ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.
v6:
- Some cleanup and added code comments (Matthew Brost)
- Clarified the ticketlock in the commit message (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 156 ++
 include/drm/ttm/ttm_bo.h  |  35 +++
 2 files changed, 191 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0b3f4267130c..c4f678f30fc2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -768,3 +768,159 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
ttm_tt_destroy(bo->bdev, ttm);
return ret;
 }
+
+static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
+struct ttm_buffer_object *bo,
+bool *needs_unlock)
+{
+   struct ttm_operation_ctx *ctx = walk->ctx;
+
+   *needs_unlock = false;
+
+   if (dma_resv_trylock(bo->base.resv)) {
+   *needs_unlock = true;
+   return true;
+   }
+
+   if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
+   dma_resv_assert_held(bo->base.resv);
+   return true;
+   }
+
+   return false;
+}
+
+static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
+  struct ttm_buffer_object *bo,
+  bool *needs_unlock)
+{
+   struct dma_resv *resv = bo->base.resv;
+   int ret;
+
+   if (walk->ctx->interruptible)
+   ret = dma_resv_lock_interruptible(resv, walk->ticket);
+   else
+   ret = dma_resv_lock(resv, walk->ticket);
+
+   if (!ret) {
+   *needs_unlock = true;
+   /*
+* Only a single ticketlock per loop. Ticketlocks are prone
+* to return -EDEADLK causing the eviction to fail, so
+* after waiting for the ticketlock, revert back to
+* trylocking for this walk.
+*/
+   walk->ticket = NULL;
+   } else if (ret == -EDEADLK) {
+   /* Caller needs to exit the ww transaction. */
+   ret = -ENOSPC;
+   }
+
+   return ret;
+}
+
+static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
+{
+   if (locked)
+   dma_resv_unlock(bo->base.resv);
+}
+
+/**
+ * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
+ * valid items.
+ * @walk: describe the walks and actions taken
+ * @bdev: The TTM device.
+ * @man: The struct ttm_resource manager whose LRU lists we're walking.
+ * @target: The end condition for the walk.
+ *
+ * The LRU lists of @man are walk, and for each struct ttm_resource 
encountered,
+ * the corresponding ttm_buffer_object is locked and taken a reference on, and
+ * the LRU lock is dropped. the LRU lock may be dropped before locking and, in
+ * that case, it's verified that the item actually remains on the LRU list 
after
+ * the lock, and that the buffer object didn't switch resource in between.
+ *
+ * With a locked object, the actions indicated by @walk->process_bo are
+ * performed, and after that, the bo is unlocked, the refcount dropped and the
+ * next struct ttm_resource is processed. Here, the walker relies on
+ * TTM's restartable LRU list implementation.
+ *
+ * Typically @walk->process_bo() would return the number of pages evicted,
+ * swapped or shrunken, so that when the total exceeds @target, or when the
+ * LRU list has been walked in full, iteration is terminated. It's also 
terminated
+ * on error. Note that the definition of @target is done by the caller, it
+ * could have a different meaning than the number of pages.
+ *
+ * Note that the way dma_resv individualization is done, locking needs to be 
done
+ * either with the LRU lock held (trylocking only) or with a reference on the
+ * object.
+ *
+ * Return: The progress made towards target or negative error code on error.
+ */
+long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
+   struct ttm_resource_manager *man, long target)
+{
+

[PATCH v6 04/12] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-07-03 Thread Thomas Hellström
To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.

Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.

Changes in previous series:
- Completely rework the functionality
- Avoid a NULL pointer dereference assigning manager->mem_type
- Remove some leftover code causing build problems
v2:
- For hitch bulk tail moves, store the mem_type in the cursor
  instead of with the manager.
v3:
- Remove leftover mem_type member from change in v2.
v6:
- Add some lockdep asserts (Matthew Brost)
- Avoid NULL pointer dereference (Matthew Brost)
- No need to check bo->resource before dereferencing
  bo->bulk_move (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
 drivers/gpu/drm/ttm/ttm_resource.c | 92 ++
 drivers/gpu/drm/xe/xe_vm.c |  4 ++
 include/drm/ttm/ttm_resource.h | 56 ++--
 4 files changed, 135 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3abfa66d72a2..97743993d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2420,6 +2420,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
return r;
 
+   ttm_lru_bulk_move_init(>lru_bulk_move);
+
vm->is_compute_context = false;
 
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2484,6 +2486,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
amdgpu_vm_fini_entities(vm);
 
return r;
@@ -2640,6 +2643,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
}
 
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
 }
 
 /**
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 9c8b6499edfb..b6a2daac5518 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,53 @@
 
 #include 
 
+/* Detach the cursor from the bulk move list*/
+static void
+ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
+{
+   lockdep_assert_held(>man->bdev->lru_lock);
+
+   cursor->bulk = NULL;
+   list_del_init(>bulk_link);
+}
+
+/* Move the cursor to the end of the bulk move list it's in */
+static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk,
+  struct ttm_resource_cursor 
*cursor)
+{
+   struct ttm_lru_bulk_move_pos *pos;
+
+   lockdep_assert_held(>man->bdev->lru_lock);
+
+   if (WARN_ON_ONCE(bulk != cursor->bulk)) {
+   list_del_init(>bulk_link);
+   return;
+   }
+
+   pos = >pos[cursor->mem_type][cursor->priority];
+   if (pos->last)
+   list_move(>hitch.link, >last->lru.link);
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
+/* Move all cursors attached to a bulk move to its end */
+static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_move_bulk_tail(bulk, cursor);
+}
+
+/* Remove a cursor from an empty bulk move list */
+static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
 /**
  * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
  * @cursor: The struct ttm_resource_cursor to finalize.
@@ -45,6 +92,7 @@ void ttm_resource_cursor_fini_locked(struct 
ttm_resource_cursor *cursor)
 {
lockdep_assert_held(>man->bdev->lru_lock);
list_del_init(>hitch.link);
+   ttm_resource_cursor_clear_bulk(cursor);
 }
 
 /**
@@ -73,9 +121,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor 
*cursor)
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
 {
memset(bulk, 0, sizeof(*bulk));
+   INIT_LIST_HEAD(>cursor_list);
 }
 

[PATCH v6 03/12] drm/ttm: Use LRU hitches

2024-07-03 Thread Thomas Hellström
Have iterators insert themselves into the list they are iterating
over using hitch list nodes. Since only the iterator owner
can remove these list nodes from the list, it's safe to unlock
the list and when continuing, use them as a starting point. Due to
the way LRU bumping works in TTM, newly added items will not be
missed, and bumped items will be iterated over a second time before
reaching the end of the list.

The exception is list with bulk move sublists. When bumping a
sublist, a hitch that is part of that sublist will also be moved
and we might miss items if restarting from it. This will be
addressed in a later patch.

Changes in previous series:
- Updated ttm_resource_cursor_fini() documentation.
v2:
- Don't reorder ttm_resource_manager_first() and _next().
  (Christian König).
- Use list_add instead of list_move
  (Christian König)
v3:
- Split into two patches, one cleanup, one new functionality
  (Christian König)
- use ttm_resource_cursor_fini_locked() instead of open-coding
  (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  1 +
 drivers/gpu/drm/ttm/ttm_device.c   |  9 +++--
 drivers/gpu/drm/ttm/ttm_resource.c | 56 +-
 include/drm/ttm/ttm_resource.h |  9 +++--
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..43eda720657f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -621,6 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (locked)
dma_resv_unlock(res->bo->base.resv);
}
+   ttm_resource_cursor_fini_locked();
 
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 09411978a13a..f9e9b1ec8c8a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -170,12 +170,17 @@ int ttm_device_swapout(struct ttm_device *bdev, struct 
ttm_operation_ctx *ctx,
num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
-   if (!ret)
+   if (!ret) {
+   ttm_resource_cursor_fini();
return num_pages;
-   if (ret != -EBUSY)
+   }
+   if (ret != -EBUSY) {
+   ttm_resource_cursor_fini();
return ret;
+   }
}
}
+   ttm_resource_cursor_fini_locked();
spin_unlock(>lru_lock);
return 0;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 8bfbc0e8..9c8b6499edfb 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,37 @@
 
 #include 
 
+/**
+ * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called with the LRU lock held. The function
+ * can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor)
+{
+   lockdep_assert_held(>man->bdev->lru_lock);
+   list_del_init(>hitch.link);
+}
+
+/**
+ * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called without the LRU list lock held. The
+ * function can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor)
+{
+   spinlock_t *lru_lock = >man->bdev->lru_lock;
+
+   spin_lock(lru_lock);
+   ttm_resource_cursor_fini_locked(cursor);
+   spin_unlock(lru_lock);
+}
+
 /**
  * ttm_lru_bulk_move_init - initialize a bulk move structure
  * @bulk: the structure to init
@@ -485,12 +516,15 @@ void ttm_resource_manager_debug(struct 
ttm_resource_manager *man,
 EXPORT_SYMBOL(ttm_resource_manager_debug);
 
 /**
- * ttm_resource_manager_first
- *
+ * ttm_resource_manager_first() - Start iterating over the resources
+ * of a resource manager
  * @man: resource manager to iterate over
  * @cursor: cursor to record the position
  *
- * Returns the first resource from the resource manager.
+ * Initializes the cursor and starts iterating. When done iterating,
+ * the caller must explicitly call ttm_resource_cursor_fini().
+ *
+ * Return: The

[PATCH v6 02/12] drm/ttm: Slightly clean up LRU list iteration

2024-07-03 Thread Thomas Hellström
To make the transition to using lru hitches easier,
simplify the ttm_resource_manager_next() interface to only take
the cursor and reuse ttm_resource_manager_next() functionality
from ttm_resource_manager_first().

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_resource.c | 48 +-
 include/drm/ttm/ttm_resource.h | 10 ---
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index db9a7a3717c4..8bfbc0e8 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -496,50 +496,44 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru;
-
lockdep_assert_held(>bdev->lru_lock);
 
-   for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   return NULL;
+   cursor->priority = 0;
+   cursor->man = man;
+   cursor->cur = >lru[cursor->priority];
+   return ttm_resource_manager_next(cursor);
 }
 
 /**
  * ttm_resource_manager_next
  *
- * @man: resource manager to iterate over
  * @cursor: cursor to record the position
- * @res: the current resource pointer
  *
- * Returns the next resource from the resource manager.
+ * Return: the next resource from the resource manager.
  */
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res)
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru = >lru;
+   struct ttm_resource_manager *man = cursor->man;
+   struct ttm_lru_item *lru;
 
lockdep_assert_held(>bdev->lru_lock);
 
-   list_for_each_entry_continue(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   ttm_lru_item_to_res(lru);
+   for (;;) {
+   lru = list_entry(cursor->cur, typeof(*lru), link);
+   list_for_each_entry_continue(lru, >lru[cursor->priority], 
link) {
+   if (ttm_lru_item_is_res(lru)) {
+   cursor->cur = >link;
+   return ttm_lru_item_to_res(lru);
+   }
}
 
+   if (++cursor->priority >= TTM_MAX_BO_PRIORITY)
+   break;
+
+   cursor->cur = >lru[cursor->priority];
+   }
+
return NULL;
 }
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 1511d91e290d..7d81fd5b5b83 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
 /**
  * struct ttm_resource_cursor
  *
+ * @man: The resource manager currently being iterated over.
+ * @cur: The list head the cursor currently points to.
  * @priority: the current priority
  *
  * Cursor to iterate over the resources in a manager.
  */
 struct ttm_resource_cursor {
+   struct ttm_resource_manager *man;
+   struct list_head *cur;
unsigned int priority;
 };
 
@@ -438,9 +442,7 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor);
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res);
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor);
 
 struct ttm_resource *
 ttm_lru_first_res_or_null(struct list_head *head);
@@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head);
  */
 #define ttm_resource_manager_for_each_res(man, cursor, res)\
for (res = ttm_resource_manager_first(man, cursor); res;\
-res = ttm_resource_manager_next(man, cursor, res))
+res = ttm_resource_manager_next(cursor))
 
 struct ttm_kmap_iter *
 ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
-- 
2.44.0



[PATCH v6 01/12] drm/ttm: Allow TTM LRU list nodes of different types

2024-07-03 Thread Thomas Hellström
To be able to handle list unlocking while traversing the LRU
list, we want the iterators not only to point to the next
position of the list traversal, but to insert themselves as
list nodes at that point to work around the fact that the
next node might otherwise disappear from the list while
the iterator is pointing to it.

These list nodes need to be easily distinguishable from other
list nodes so that others traversing the list can skip
over them.

So declare a struct ttm_lru_item, with a struct list_head member
and a type enum. This will slightly increase the size of a
struct ttm_resource.

Changes in previous series:
- Update enum ttm_lru_item_type documentation.
v3:
- Introduce ttm_lru_first_res_or_null()
  (Christian König, Thomas Hellström)
v5:
- Update also the TTM test code (Xe CI).

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c   |  6 +-
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |  2 +-
 drivers/gpu/drm/ttm/ttm_device.c  |  4 +-
 drivers/gpu/drm/ttm/ttm_resource.c| 89 +++
 include/drm/ttm/ttm_resource.h| 54 ++-
 5 files changed, 129 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
index d1b32303d051..f0a7eb62116c 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
@@ -271,7 +271,7 @@ static void ttm_bo_unreserve_basic(struct kunit *test)
 
man = ttm_manager_type(priv->ttm_dev, mem_type);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >lru[bo->priority]), 1);
+   list_is_last(>lru.link, >lru[bo->priority]), 
1);
 
ttm_resource_free(bo, );
ttm_resource_free(bo, );
@@ -308,11 +308,11 @@ static void ttm_bo_unreserve_pinned(struct kunit *test)
err = ttm_resource_alloc(bo, place, );
KUNIT_ASSERT_EQ(test, err, 0);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >ttm_dev->pinned), 1);
+   list_is_last(>lru.link, >ttm_dev->pinned), 
1);
 
ttm_bo_unreserve(bo);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >ttm_dev->pinned), 1);
+   list_is_last(>lru.link, >ttm_dev->pinned), 
1);
 
ttm_resource_free(bo, );
ttm_resource_free(bo, );
diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
index 9c2f13e53162..22260e7aea58 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
@@ -198,7 +198,7 @@ static void ttm_resource_fini_basic(struct kunit *test)
ttm_resource_init(bo, place, res);
ttm_resource_fini(man, res);
 
-   KUNIT_ASSERT_TRUE(test, list_empty(>lru));
+   KUNIT_ASSERT_TRUE(test, list_empty(>lru.link));
KUNIT_ASSERT_EQ(test, man->usage, 0);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 434cf0258000..09411978a13a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -274,14 +274,14 @@ static void ttm_device_clear_lru_dma_mappings(struct 
ttm_device *bdev,
struct ttm_resource *res;
 
spin_lock(>lru_lock);
-   while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
+   while ((res = ttm_lru_first_res_or_null(list))) {
struct ttm_buffer_object *bo = res->bo;
 
/* Take ref against racing releases once lru_lock is unlocked */
if (!ttm_bo_get_unless_zero(bo))
continue;
 
-   list_del_init(>lru);
+   list_del_init(>resource->lru.link);
spin_unlock(>lru_lock);
 
if (bo->ttm)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 4a66b851b67d..db9a7a3717c4 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -70,8 +70,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
dma_resv_assert_held(pos->last->bo->base.resv);
 
man = ttm_manager_type(pos->first->bo->bdev, i);
-   list_bulk_move_tail(>lru[j], >first->lru,
-   >last->lru);
+   list_bulk_move_tail(>lru[j], >first->lru.link,
+   >last->lru.link);
}
}
 }
@@ -84,14 +84,38 @@ ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, 
struct ttm_resource *res)
return >pos[res->mem_type][res->bo->priori

[PATCH v6 00/12] TTM shrinker helpers and xe buffer object shrinker

2024-07-03 Thread Thomas Hellström
This series implements TTM shrinker / eviction helpers and an xe bo
shrinker. It builds on two previous series, *and obsoletes these*. First

https://www.mail-archive.com/dri-devel@lists.freedesktop.org/msg484425.html

Second the previous TTM shrinker series

https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/

Where the comment about layering
https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9

now addressed, and this version also implements shmem objects for backup
rather than direct swap-cache insertions, which was used in the previuos
series. It turns out that with per-page backup / shrinking, shmem objects
appears to work just as well as direct swap-cache insertions with the
added benefit that was introduced in the previous TTM shrinker series to
avoid running out of swap entries isn't really needed.

Patch 1-4 implements restartable LRU list iteration.

Patch 5 implements a LRU walker + resv locking helper

Patch 6 moves TTM swapping over to the walker.

Patch 7 moves TTM eviction over to the walker.

Patch 8 could in theory be skipped but introduces a possibility to easily
add or test multiple backup backends, like the direct swap-cache
insertion or even files into fast dedicated nvme storage for for example.

Patch 9 introduces helpers in the ttm_pool code for page-by-page shrinking
and recovery. It avoids having to temporarily allocate a huge amount of
memory to be able to shrink a buffer object. It also introduces the
possibility to immediately write-back pages if needed, since that tends
to be a bit delayed when left to kswapd.

Patch 10 Adds a simple error injection to the above code to help increase
test coverage.

Patch 11 Implements an xe bo shrinker and a common helper in TTM for
shrinking.

Patch 12-21 are really a separate POC series, for introducing drm_exec locking
in TTM. The patch touches both drm_exec and dma-buf and is for now marked as
an RFC:

Patch 12 Increases (removes) the XE_PL_TT watermark.

v2:
- Squash obsolete revision history in the patch commit messages.
- Fix a couple of review comments by Christian
- Don't store the mem_type in the TTM managers but in the
  resource cursor.
- Rename introduced TTM *back_up* function names to *backup*
- Add ttm pool recovery fault injection.
- Shrinker xe kunit test
- Various bugfixes

v3:
- Address some review comments from Matthew Brost and Christian König.
- Use the restartable LRU walk for TTM swapping and eviction.
- Provide a POC drm_exec locking implementation for exhaustive
  eviction. (Christian König).

v4:
- Remove the RFC exhaustive eviction part. While the path to exhaustive
  eviction is pretty clear and demonstrated in v3, there is still some
  drm_exec work that needs to be agreed and implemented.
- Add shrinker power management. On some hw we need to wake when shrinking.
- Fix the lru walker helper for -EALREADY errors.
- Add drm/xe: Increase the XE_PL_TT watermark.

v5:
- Update also TTM kunit tests
- Handle ghost- and zombie objects in the shrinker.
- A couple of compile- and UAF fixes reported by Kernel Build Robot and
  Dan Carpenter.

v6:
- Address review comments from Matthew Brost as detailed in patches
  4/12, 5/12, 6/12, 7/12, 8/12.

Cc: Somalapuram Amaranath 
Cc: Christian König 
Cc: Matthew Brost 
Cc: 

Thomas Hellström (12):
  drm/ttm: Allow TTM LRU list nodes of different types
  drm/ttm: Slightly clean up LRU list iteration
  drm/ttm: Use LRU hitches
  drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist
moves
  drm/ttm: Provide a generic LRU walker helper
  drm/ttm: Use the LRU walker helper for swapping
  drm/ttm: Use the LRU walker for eviction
  drm/ttm: Add a virtual base class for graphics memory backup
  drm/ttm/pool: Provide a helper to shrink pages
  drm/ttm: Use fault-injection to test error paths
  drm/ttm, drm/xe: Add a shrinker for xe bos
  drm/xe: Increase the XE_PL_TT watermark

 drivers/gpu/drm/Kconfig   |  10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   4 +
 drivers/gpu/drm/ttm/Makefile  |   2 +-
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c   |   6 +-
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |   2 +-
 drivers/gpu/drm/ttm/ttm_backup_shmem.c| 139 ++
 drivers/gpu/drm/ttm/ttm_bo.c  | 458 --
 drivers/gpu/drm/ttm/ttm_bo_util.c | 223 +
 drivers/gpu/drm/ttm/ttm_device.c  |  29 +-
 drivers/gpu/drm/ttm/ttm_pool.c| 412 +++-
 drivers/gpu/drm/ttm/ttm_resource.c| 268 --
 drivers/gpu/drm/ttm/ttm_tt.c  |  37 ++
 drivers/gpu/drm/xe/Makefile   |   1 +
 drivers/gpu/drm/xe/tests/xe_bo.c  | 118 +
 drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
 drivers/gpu/drm/xe/xe_bo.c| 155 +-
 drivers

Re: Using drm_exec for TTMs BO eviction

2024-07-03 Thread Thomas Hellström
On Wed, 2024-07-03 at 16:30 +0200, Christian König wrote:
> Am 03.07.24 um 15:59 schrieb Thomas Hellström:
> > On Wed, 2024-07-03 at 15:53 +0200, Thomas Hellström wrote:
> > > On Wed, 2024-07-03 at 15:40 +0200, Thomas Hellström wrote:
> > > > Hi, Christian,
> > > > 
> > > > On Wed, 2024-07-03 at 15:25 +0200, Christian König wrote:
> > > > > Hi guys,
> > > > > 
> > > > > We recently ran into a problem with deadlocks during eviction
> > > > > and
> > > > > while back Thomas worked on a patch set which was going into
> > > > > the
> > > > > direction of solving this.
> > > > > 
> > > > > So I simplified it to just the functionality needed to
> > > > > resolve
> > > > > this
> > > > > issue at. The resulting patch set is just the initial first
> > > > > step
> > > > > of
> > > > > using drm_exec in TTM for locking BOs during eviction.
> > > > > 
> > > > > Should a deadlock happen the drm_exec object is now used to
> > > > > resolve
> > > > > it and prelock the contended object. This approach solves
> > > > > this
> > > > > the
> > > > > ENOMEM issue on contending evictions quite nicely.
> > > > > 
> > > > > Please review and comment,
> > > > > Christian.
> > > > Overall it looks sane, but I think it makes sense to review and
> > > > land
> > > > the part of the shrinker series first that touches this
> > > > eviction
> > > > path
> > > > and gets rid of a lot of code that's hard to understand and
> > > > simplifies
> > > > the locking a lot. (That part doesn't touch drm_exec), and it
> > > > has
> > > > been
> > > > pending reviews for some time.
> 
> That's actually exactly what I wanted to avoid.

> 
> > > > 
> > > > I don't think it's correct to bypass that. Then we could work
> > > > out
> > > > the
> > > > drm_exec implications.
> > > It's
> > > 
> > > https://patchwork.freedesktop.org/series/131815/
> > > 
> > > And in particular patch 7 there brings in the restartable LRU
> > > functionality and sipmlifies eviction immensely and would make
> > > the
> > > usage of this patchset's rudimentary drm_exec support easier to
> > > understand and review.
> 
> Yeah, seen Mathews comments on that stuff.
> 
> Looked like 99% sane to me the last time I checked, the only thing
> I'm 
> still not very keen at is still the bulk and cursor interaction.
> 
> > > 
> > > /Thomas
> > Hm. I actually think all review comments have been sorted out up to
> > that patch, so what's missing is a resend of the new version, RB
> > from
> > Matt and Review / Ack from you, then that part could be partially
> > merged.
> 
> Going to take another look at that.

I'll resend latest version.
/Thomas


> 
> Regards,
> Christian.
> 
> > 
> > /Thomas
> > 
> > 
> > 
> > > 
> > > > /Thomas
> > > > 
> > > > 
> > > > > 
> 



Re: Using drm_exec for TTMs BO eviction

2024-07-03 Thread Thomas Hellström
On Wed, 2024-07-03 at 15:53 +0200, Thomas Hellström wrote:
> On Wed, 2024-07-03 at 15:40 +0200, Thomas Hellström wrote:
> > Hi, Christian,
> > 
> > On Wed, 2024-07-03 at 15:25 +0200, Christian König wrote:
> > > Hi guys,
> > > 
> > > We recently ran into a problem with deadlocks during eviction and
> > > while back Thomas worked on a patch set which was going into the
> > > direction of solving this.
> > > 
> > > So I simplified it to just the functionality needed to resolve
> > > this
> > > issue at. The resulting patch set is just the initial first step
> > > of
> > > using drm_exec in TTM for locking BOs during eviction.
> > > 
> > > Should a deadlock happen the drm_exec object is now used to
> > > resolve
> > > it and prelock the contended object. This approach solves this
> > > the
> > > ENOMEM issue on contending evictions quite nicely.
> > > 
> > > Please review and comment,
> > > Christian.
> > 
> > Overall it looks sane, but I think it makes sense to review and 
> > land
> > the part of the shrinker series first that touches this eviction
> > path
> > and gets rid of a lot of code that's hard to understand and
> > simplifies
> > the locking a lot. (That part doesn't touch drm_exec), and it has
> > been
> > pending reviews for some time.
> > 
> > I don't think it's correct to bypass that. Then we could work out
> > the
> > drm_exec implications.
> 
> It's
> 
> https://patchwork.freedesktop.org/series/131815/
> 
> And in particular patch 7 there brings in the restartable LRU
> functionality and sipmlifies eviction immensely and would make the
> usage of this patchset's rudimentary drm_exec support easier to
> understand and review.
> 
> /Thomas

Hm. I actually think all review comments have been sorted out up to
that patch, so what's missing is a resend of the new version, RB from
Matt and Review / Ack from you, then that part could be partially
merged.

/Thomas



> 
> 
> > 
> > /Thomas
> > 
> > 
> > > 
> > > 
> > 
> 



Re: Using drm_exec for TTMs BO eviction

2024-07-03 Thread Thomas Hellström
On Wed, 2024-07-03 at 15:40 +0200, Thomas Hellström wrote:
> Hi, Christian,
> 
> On Wed, 2024-07-03 at 15:25 +0200, Christian König wrote:
> > Hi guys,
> > 
> > We recently ran into a problem with deadlocks during eviction and
> > while back Thomas worked on a patch set which was going into the
> > direction of solving this.
> > 
> > So I simplified it to just the functionality needed to resolve this
> > issue at. The resulting patch set is just the initial first step of
> > using drm_exec in TTM for locking BOs during eviction.
> > 
> > Should a deadlock happen the drm_exec object is now used to resolve
> > it and prelock the contended object. This approach solves this the
> > ENOMEM issue on contending evictions quite nicely.
> > 
> > Please review and comment,
> > Christian.
> 
> Overall it looks sane, but I think it makes sense to review and  land
> the part of the shrinker series first that touches this eviction path
> and gets rid of a lot of code that's hard to understand and
> simplifies
> the locking a lot. (That part doesn't touch drm_exec), and it has
> been
> pending reviews for some time.
> 
> I don't think it's correct to bypass that. Then we could work out the
> drm_exec implications.

It's

https://patchwork.freedesktop.org/series/131815/

And in particular patch 7 there brings in the restartable LRU
functionality and sipmlifies eviction immensely and would make the
usage of this patchset's rudimentary drm_exec support easier to
understand and review.

/Thomas


> 
> /Thomas
> 
> 
> > 
> > 
> 



Re: Using drm_exec for TTMs BO eviction

2024-07-03 Thread Thomas Hellström
Hi, Christian,

On Wed, 2024-07-03 at 15:25 +0200, Christian König wrote:
> Hi guys,
> 
> We recently ran into a problem with deadlocks during eviction and
> while back Thomas worked on a patch set which was going into the
> direction of solving this.
> 
> So I simplified it to just the functionality needed to resolve this
> issue at. The resulting patch set is just the initial first step of
> using drm_exec in TTM for locking BOs during eviction.
> 
> Should a deadlock happen the drm_exec object is now used to resolve
> it and prelock the contended object. This approach solves this the
> ENOMEM issue on contending evictions quite nicely.
> 
> Please review and comment,
> Christian.

Overall it looks sane, but I think it makes sense to review and  land
the part of the shrinker series first that touches this eviction path
and gets rid of a lot of code that's hard to understand and simplifies
the locking a lot. (That part doesn't touch drm_exec), and it has been
pending reviews for some time.

I don't think it's correct to bypass that. Then we could work out the
drm_exec implications.

/Thomas


> 
> 



Re: [Linaro-mm-sig] Re: dma_buf_detach lockdep splat

2024-07-01 Thread Thomas Hellström
On Fri, 2024-06-28 at 20:06 +0200, Daniel Vetter wrote:
> On Thu, Jun 27, 2024 at 02:18:44PM +0200, Thomas Hellström wrote:
> > On Thu, 2024-06-27 at 10:04 +0200, Daniel Vetter wrote:
> > > On Wed, Jun 26, 2024 at 05:58:02PM +0200, Thomas Hellström wrote:
> > > > Hi!
> > > > 
> > > > I'm seeing the below lockdep splat 1) with the xe driver in an
> > > > imported
> > > > dma-buf object destruction path.
> > > > 
> > > > It's not because we hold the dma_resv lock at that point, but
> > > > rather
> > > > because we hold *another* dma_resv lock at that point, and the
> > > > dma_resv
> > > > detach happens when the object is idle, in this case it was
> > > > idle at
> > > > the
> > > > final put(), and dma_buf_detach() is called in the putting
> > > > process.
> > > > 
> > > > Holding another dma-buf lock might happen as part of
> > > > drm_exec_unlock_all, or simply if the wider vm dma_resv was
> > > > held at
> > > > object put time, so it's not an uncommon pattern, even if the
> > > > drm_exec
> > > > instance can be fixed by putting all bos after unlocking them
> > > > all.
> > > > 
> > > > Two solutions coming to mind here:
> > > > 
> > > > 1) Provide a dma_buf_detach_locked()
> > > 
> > > This smells way too much like the endless headaches we had with
> > > drm_gem_object_put_locked and friends against
> > > drm_device.struct_mutex. Or
> > > I'm not understanding what you're doing, because I'm pretty sure
> > > you
> > > have
> > > to take the dma_resv lock on final put() of imported objects.
> > > Because
> > > that
> > > final put() is of the import wrapper, the exporter (and other
> > > importers)
> > > can still get at that object and so dma_resv_lock is very much
> > > needed.
> > 
> > Yeah, the TTM final put looks like
> > 
> > if (!dma_resv_trylock() || !idle)
> > queue_work(final_distruction);
> > 
> > dma_resv_unlock();
> > dma_buf_detach(); <--- lockdep splat
> > 
> > Here's where a dma_buf_detach_locked() would've made sense before
> > the
> > dma_resv_unlock().
> > 
> > But if you think this will cause grief, I'm completely fine with
> > fixing this in TTM by always taking the deferring path.
> 
> Oh I misunderstood what you've meant, I thought you want to do a huge
> exercise in passing the "do we know we're locked" flag all the way
> through
> entire callchains to exporters.
> 
> If it's just so that the fastpath of bypassing the worker can
> function for
> imported buffers, then I think that's fine. As long as we just punt
> to the
> worker if we can't get the lock.

OK, TBH, the driver would need a drm_prime_gem_destroy_locked() as well
since that's the function that calls dma_buf_detach. But TBH I think
it's worth it anyway since if we just modify TTM to always take the
delayed destruction path, I figure much code will come to depend on it
and it will be invasive to update.

I'll take a quick stab a that to see how ugly it becomes.

/Thomas


> -Sima



[PATCH] drm/ttm: Always take the bo delayed cleanup path for imported bos

2024-06-28 Thread Thomas Hellström
000ca
[22982.117195] RAX: fe00 RBX:  RCX: 7f943d267169
[22982.117196] RDX:  RSI: 0189 RDI: 5622f89579d0
[22982.117197] RBP: 7f9430bffcb0 R08:  R09: 
[22982.117198] R10:  R11: 0246 R12: 
[22982.117199] R13:  R14:  R15: 5622f89579d0
[22982.117202]  

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Christian König 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Cc: intel...@lists.freedesktop.org
Cc:  # v6.8+
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..2427be8bc97f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -346,6 +346,7 @@ static void ttm_bo_release(struct kref *kref)
if (!dma_resv_test_signaled(bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP) ||
(want_init_on_free() && (bo->ttm != NULL)) ||
+   bo->type == ttm_bo_type_sg ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy 
*/
ttm_bo_flush_all_fences(bo);
-- 
2.44.0



Re: [Linaro-mm-sig] Re: dma_buf_detach lockdep splat

2024-06-27 Thread Thomas Hellström
On Thu, 2024-06-27 at 10:04 +0200, Daniel Vetter wrote:
> On Wed, Jun 26, 2024 at 05:58:02PM +0200, Thomas Hellström wrote:
> > Hi!
> > 
> > I'm seeing the below lockdep splat 1) with the xe driver in an
> > imported
> > dma-buf object destruction path.
> > 
> > It's not because we hold the dma_resv lock at that point, but
> > rather
> > because we hold *another* dma_resv lock at that point, and the
> > dma_resv
> > detach happens when the object is idle, in this case it was idle at
> > the
> > final put(), and dma_buf_detach() is called in the putting process.
> > 
> > Holding another dma-buf lock might happen as part of
> > drm_exec_unlock_all, or simply if the wider vm dma_resv was held at
> > object put time, so it's not an uncommon pattern, even if the
> > drm_exec
> > instance can be fixed by putting all bos after unlocking them all.
> > 
> > Two solutions coming to mind here:
> > 
> > 1) Provide a dma_buf_detach_locked()
> 
> This smells way too much like the endless headaches we had with
> drm_gem_object_put_locked and friends against
> drm_device.struct_mutex. Or
> I'm not understanding what you're doing, because I'm pretty sure you
> have
> to take the dma_resv lock on final put() of imported objects. Because
> that
> final put() is of the import wrapper, the exporter (and other
> importers)
> can still get at that object and so dma_resv_lock is very much
> needed.

Yeah, the TTM final put looks like

if (!dma_resv_trylock() || !idle)
queue_work(final_distruction);

dma_resv_unlock();
dma_buf_detach(); <--- lockdep splat

Here's where a dma_buf_detach_locked() would've made sense before the
dma_resv_unlock().

But if you think this will cause grief, I'm completely fine with
fixing this in TTM by always taking the deferring path.

/Thomas
 

> 
> Or it's a completely different final put(), but I have no idea how
> you get
> that on an imported dma_buf.
> 
> > 2) Have TTM always take the delayed delete path for imported dma-
> > buf
> > objects.
> > 
> > I'd prefer 1) since I think the correct place to call this is in
> > the
> > TTM callback delete_mem_notify() where the bo is already locked,
> > and I
> > figure non-TTM gem backends may come to suffer from the same
> > problem.
> > 
> > Opinions, suggestions?
> 
> Imo 2) or trying to push the object puts outside of the
> dma_resv_lock. The
> latter is imo natural, since usually you grab references, then lock.
> And
> this even holds for at least the slow path of lru eviction, because
> you
> need to drop all locks and then do a ww_mutex_lock_slow, and that
> requires
> that you can hold references to unlocked objects.
> 
> But 2) alone is imo fine, dma_buf have become really big objects that
> go
> across drivers, extremely similar to struct file, and that is doing
> the
> delayed final put unconditionally since years too, using task_work.
> It's
> simply a solid design.
> 
> Cheers, Sima
> 
> > [1]
> > [   99.136161] 
> > [   99.136162] WARNING: possible recursive locking detected
> > [   99.136163] 6.10.0-rc2+ #6 Tainted: G U    
> > [   99.136165] 
> > [   99.136166] glxgears:sh0/4675 is trying to acquire lock:
> > [   99.136167] 9967dcdd91a8 (reservation_ww_class_mutex){+.+.}-
> > {3:3}, at: dma_buf_detach+0x3b/0xf0
> > [   99.136184] 
> >    but task is already holding lock:
> > [   99.136186] 9967d8c145a8 (reservation_ww_class_mutex){+.+.}-
> > {3:3}, at: drm_exec_lock_obj+0x49/0x2b0 [drm_exec]
> > [   99.136191] 
> >    other info that might help us debug this:
> > [   99.136192]  Possible unsafe locking scenario:
> > 
> > [   99.136194]    CPU0
> > [   99.136194]    
> > [   99.136195]   lock(reservation_ww_class_mutex);
> > [   99.136197]   lock(reservation_ww_class_mutex);
> > [   99.136199] 
> >     *** DEADLOCK ***
> > 
> > [   99.136199]  May be due to missing lock nesting notation
> > 
> > [   99.136200] 5 locks held by glxgears:sh0/4675:
> > [   99.136202]  #0: 9967d8c104c8 (>vm.lock){+.+.}-{3:3},
> > at:
> > xe_file_close+0xde/0x1c0 [xe]
> > [   99.136272]  #1: 9967d5bb7480 (>lock){}-{3:3}, at:
> > xe_vm_close_and_put+0x161/0x9b0 [xe]
> > [   99.136350]  #2: 9967ef88a970 (>lock){.+.+}-{3:3}, at:
> > xe_validation_ctx_init+0x6d/0x70 [xe]
> > [   99.136440]  #3: bd6a085577b8
> > (reservation_ww_class_acquire){+.+

Re: [Linaro-mm-sig] Re: dma_buf_detach lockdep splat

2024-06-27 Thread Thomas Hellström
On Thu, 2024-06-27 at 10:25 +0200, Christian König wrote:
> Am 27.06.24 um 10:04 schrieb Daniel Vetter:
> > On Wed, Jun 26, 2024 at 05:58:02PM +0200, Thomas Hellström wrote:
> > > Hi!
> > > 
> > > I'm seeing the below lockdep splat 1) with the xe driver in an
> > > imported
> > > dma-buf object destruction path.
> 
> Mhm strange.
> 
> > > 
> > > It's not because we hold the dma_resv lock at that point, but
> > > rather
> > > because we hold *another* dma_resv lock at that point, and the
> > > dma_resv
> > > detach happens when the object is idle, in this case it was idle
> > > at the
> > > final put(), and dma_buf_detach() is called in the putting
> > > process.
> > > 
> > > Holding another dma-buf lock might happen as part of
> > > drm_exec_unlock_all, or simply if the wider vm dma_resv was held
> > > at
> > > object put time, so it's not an uncommon pattern, even if the
> > > drm_exec
> > > instance can be fixed by putting all bos after unlocking them
> > > all.
> > > 
> > > Two solutions coming to mind here:
> > > 
> > > 1) Provide a dma_buf_detach_locked()
> > This smells way too much like the endless headaches we had with
> > drm_gem_object_put_locked and friends against
> > drm_device.struct_mutex. Or
> > I'm not understanding what you're doing, because I'm pretty sure
> > you have
> > to take the dma_resv lock on final put() of imported objects.
> > Because that
> > final put() is of the import wrapper, the exporter (and other
> > importers)
> > can still get at that object and so dma_resv_lock is very much
> > needed.
> > 
> > Or it's a completely different final put(), but I have no idea how
> > you get
> > that on an imported dma_buf.
> > 
> > > 2) Have TTM always take the delayed delete path for imported dma-
> > > buf
> > > objects.
> > > 
> > > I'd prefer 1) since I think the correct place to call this is in
> > > the
> > > TTM callback delete_mem_notify() where the bo is already locked,
> > > and I
> > > figure non-TTM gem backends may come to suffer from the same
> > > problem.
> > > 
> > > Opinions, suggestions?
> > Imo 2) or trying to push the object puts outside of the
> > dma_resv_lock.
> 
> IIRC I've stumbled over this issue before with TTM but though that
> I've 
> fixed it.
> 
> I mean no objections from my side to change drm_exec_fini() to
> something 
> like this:
> 
> drm_exec_for_each_locked_object_reverse(exec, index, obj)
>  dma_resv_unlock(obj->resv);
> 
> drm_exec_for_each_locked_object_reverse(exec, index, obj)
>  drm_gem_object_put(obj);
> 
> but in general that the last reference is dropped while holding a 
> different reservation object is not something special. For example
> that 
> happens all the time in TTMs eviction code.
> 
> So at least for TTM I would say we should move cleanup of imported
> BOs 
> to the worker. But not sure if that covers everything.

I'm fine with this. It covers all the TTM use-cases, I think.

Thanks,
/Thomas


> 
> Regards,
> Christian.
> 
> >   The
> > latter is imo natural, since usually you grab references, then
> > lock. And
> > this even holds for at least the slow path of lru eviction, because
> > you
> > need to drop all locks and then do a ww_mutex_lock_slow, and that
> > requires
> > that you can hold references to unlocked objects.
> > 
> > But 2) alone is imo fine, dma_buf have become really big objects
> > that go
> > across drivers, extremely similar to struct file, and that is doing
> > the
> > delayed final put unconditionally since years too, using task_work.
> > It's
> > simply a solid design.
> > 
> > Cheers, Sima
> > 
> > > [1]
> > > [   99.136161] 
> > > [   99.136162] WARNING: possible recursive locking detected
> > > [   99.136163] 6.10.0-rc2+ #6 Tainted: G U
> > > [   99.136165] 
> > > [   99.136166] glxgears:sh0/4675 is trying to acquire lock:
> > > [   99.136167] 9967dcdd91a8
> > > (reservation_ww_class_mutex){+.+.}-
> > > {3:3}, at: dma_buf_detach+0x3b/0xf0
> > > [   99.136184]
> > >     but task is already holding lock:
> > > [   99.136186] 9967d8c145a8
> > > (reservation_ww_class_mutex){+.+.}-
> > > {3:3},

Re: [PATCH v3 2/2] drm/xe/lnl: Offload system clear page activity to GPU

2024-06-27 Thread Thomas Hellström
Hi Nirmoy

On Mon, 2024-06-24 at 16:14 +0200, Nirmoy Das wrote:
> On LNL because of flat CCS, driver creates a migrate job to clear
> CCS meta data. Extend that to also clear system pages using GPU.
> Inform TTM to allocate pages without __GFP_ZERO to avoid double page
> clearing by clearing out TTM_TT_FLAG_ZERO_ALLOC flag and set
> TTM_TT_FLAG_CLEARED_ON_FREE while freeing to skip ttm pool's
> clearn-on-free as XE now takes care of clearing pages.
> 
> To test the patch, I created a small test that tries to submit a job
> after binding various sizes of buffer which shows good gains for
> larger
> buffer. For lower buffer sizes, the result is not very reliable as
> the
> results vary a lot.

Some concerns below,

also a big security concern. 

The CCS clearing occurs when the bo is moved to TT. But there are
situations in which the bo is created and populated in system. For
example if the bo is created using
DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING, and then mmap'd Then it won't get
cleared. Since we don't have a dma_mapping of the bo at that time we
must revert to cpu clear when / that happens.

> 
> With the patch
> sudo  ~/igt-gpu-tools/build/tests/xe_exec_store --run basic-store-
> benchmark
> IGT-Version: 1.28-g2ed908c0b (x86_64) (Linux: 6.10.0-rc2-xe+ x86_64)
> Using IGT_SRANDOM=1719237905 for randomisation
> Opened device: /dev/dri/card0
> Starting subtest: basic-store-benchmark
> Starting dynamic subtest: WC
> Dynamic subtest WC: SUCCESS (0.000s)
> Time taken for size SZ_4K: 9493 us
> Time taken for size SZ_2M: 5503 us
> Time taken for size SZ_64M: 13016 us
> Time taken for size SZ_128M: 29464 us
> Time taken for size SZ_256M: 38408 us
> Time taken for size SZ_1G: 148758 us
> Starting dynamic subtest: WB
> Dynamic subtest WB: SUCCESS (0.000s)
> Time taken for size SZ_4K: 3889 us
> Time taken for size SZ_2M: 6091 us
> Time taken for size SZ_64M: 20920 us
> Time taken for size SZ_128M: 32394 us
> Time taken for size SZ_256M: 61710 us
> Time taken for size SZ_1G: 215437 us
> Subtest basic-store-benchmark: SUCCESS (0.589s)
> 
> With the patch:
> sudo  ~/igt-gpu-tools/build/tests/xe_exec_store --run basic-store-
> benchmark
> IGT-Version: 1.28-g2ed908c0b (x86_64) (Linux: 6.10.0-rc2-xe+ x86_64)
> Using IGT_SRANDOM=1719238062 for randomisation
> Opened device: /dev/dri/card0
> Starting subtest: basic-store-benchmark
> Starting dynamic subtest: WC
> Dynamic subtest WC: SUCCESS (0.000s)
> Time taken for size SZ_4K: 11803 us
> Time taken for size SZ_2M: 4237 us
> Time taken for size SZ_64M: 8649 us
> Time taken for size SZ_128M: 14682 us
> Time taken for size SZ_256M: 22156 us
> Time taken for size SZ_1G: 74457 us
> Starting dynamic subtest: WB
> Dynamic subtest WB: SUCCESS (0.000s)
> Time taken for size SZ_4K: 5129 us
> Time taken for size SZ_2M: 12563 us
> Time taken for size SZ_64M: 14860 us
> Time taken for size SZ_128M: 26064 us
> Time taken for size SZ_256M: 47167 us
> Time taken for size SZ_1G: 170304 us
> Subtest basic-store-benchmark: SUCCESS (0.417s)
> 
> With the patch and init_on_alloc=0
> sudo  ~/igt-gpu-tools/build/tests/xe_exec_store --run basic-store-
> benchmark
> IGT-Version: 1.28-g2ed908c0b (x86_64) (Linux: 6.10.0-rc2-xe+ x86_64)
> Using IGT_SRANDOM=1719238219 for randomisation
> Opened device: /dev/dri/card0
> Starting subtest: basic-store-benchmark
> Starting dynamic subtest: WC
> Dynamic subtest WC: SUCCESS (0.000s)
> Time taken for size SZ_4K: 4803 us
> Time taken for size SZ_2M: 9212 us
> Time taken for size SZ_64M: 9643 us
> Time taken for size SZ_128M: 13479 us
> Time taken for size SZ_256M: 22429 us
> Time taken for size SZ_1G: 83110 us
> Starting dynamic subtest: WB
> Dynamic subtest WB: SUCCESS (0.000s)
> Time taken for size SZ_4K: 4003 us
> Time taken for size SZ_2M: 4443 us
> Time taken for size SZ_64M: 12960 us
> Time taken for size SZ_128M: 13741 us
> Time taken for size SZ_256M: 26841 us
> Time taken for size SZ_1G: 84746 us
> Subtest basic-store-benchmark: SUCCESS (0.290s)
> 
> v2: Handle regression on dgfx(Himal)
>     Update commit message as no ttm API changes needed.
> v3: Fix Kunit test.
> 
> Cc: Himal Prasad Ghimiray 
> Cc: Matthew Auld 
> Cc: "Thomas Hellström" 
> Signed-off-by: Nirmoy Das 
> ---
>  drivers/gpu/drm/xe/xe_bo.c   | 11 +++
>  drivers/gpu/drm/xe/xe_device.c   |  7 +++
>  drivers/gpu/drm/xe/xe_device_types.h |  2 ++
>  drivers/gpu/drm/xe/xe_migrate.c  |  5 +++--
>  4 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 65c696966e96..a9ce4347a7d7 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu

dma_buf_detach lockdep splat

2024-06-26 Thread Thomas Hellström
Hi!

I'm seeing the below lockdep splat 1) with the xe driver in an imported
dma-buf object destruction path.

It's not because we hold the dma_resv lock at that point, but rather
because we hold *another* dma_resv lock at that point, and the dma_resv
detach happens when the object is idle, in this case it was idle at the
final put(), and dma_buf_detach() is called in the putting process.

Holding another dma-buf lock might happen as part of
drm_exec_unlock_all, or simply if the wider vm dma_resv was held at
object put time, so it's not an uncommon pattern, even if the drm_exec
instance can be fixed by putting all bos after unlocking them all.

Two solutions coming to mind here:

1) Provide a dma_buf_detach_locked()
2) Have TTM always take the delayed delete path for imported dma-buf
objects.

I'd prefer 1) since I think the correct place to call this is in the
TTM callback delete_mem_notify() where the bo is already locked, and I
figure non-TTM gem backends may come to suffer from the same problem.

Opinions, suggestions?

[1]
[   99.136161] 
[   99.136162] WARNING: possible recursive locking detected
[   99.136163] 6.10.0-rc2+ #6 Tainted: G U
[   99.136165] 
[   99.136166] glxgears:sh0/4675 is trying to acquire lock:
[   99.136167] 9967dcdd91a8 (reservation_ww_class_mutex){+.+.}-
{3:3}, at: dma_buf_detach+0x3b/0xf0
[   99.136184] 
   but task is already holding lock:
[   99.136186] 9967d8c145a8 (reservation_ww_class_mutex){+.+.}-
{3:3}, at: drm_exec_lock_obj+0x49/0x2b0 [drm_exec]
[   99.136191] 
   other info that might help us debug this:
[   99.136192]  Possible unsafe locking scenario:

[   99.136194]CPU0
[   99.136194]
[   99.136195]   lock(reservation_ww_class_mutex);
[   99.136197]   lock(reservation_ww_class_mutex);
[   99.136199] 
*** DEADLOCK ***

[   99.136199]  May be due to missing lock nesting notation

[   99.136200] 5 locks held by glxgears:sh0/4675:
[   99.136202]  #0: 9967d8c104c8 (>vm.lock){+.+.}-{3:3}, at:
xe_file_close+0xde/0x1c0 [xe]
[   99.136272]  #1: 9967d5bb7480 (>lock){}-{3:3}, at:
xe_vm_close_and_put+0x161/0x9b0 [xe]
[   99.136350]  #2: 9967ef88a970 (>lock){.+.+}-{3:3}, at:
xe_validation_ctx_init+0x6d/0x70 [xe]
[   99.136440]  #3: bd6a085577b8
(reservation_ww_class_acquire){+.+.}-{0:0}, at:
xe_vma_destroy_unlocked+0x7f/0xe0 [xe]
[   99.136546]  #4: 9967d8c145a8
(reservation_ww_class_mutex){+.+.}-{3:3}, at:
drm_exec_lock_obj+0x49/0x2b0 [drm_exec]
[   99.136552] 
   stack backtrace:
[   99.136553] CPU: 10 PID: 4675 Comm: glxgears:sh0 Tainted: G U  
6.10.0-rc2+ #6
[   99.136555] Hardware name: ASUS System Product Name/PRIME B560M-A
AC, BIOS 2001 02/01/2023
[   99.136557] Call Trace:
[   99.136558]  
[   99.136560]  dump_stack_lvl+0x77/0xb0
[   99.136564]  __lock_acquire+0x1232/0x2160
[   99.136569]  lock_acquire+0xcb/0x2d0
[   99.136570]  ? dma_buf_detach+0x3b/0xf0
[   99.136574]  ? __lock_acquire+0x417/0x2160
[   99.136577]  __ww_mutex_lock.constprop.0+0xd0/0x13b0
[   99.136580]  ? dma_buf_detach+0x3b/0xf0
[   99.136584]  ? dma_buf_detach+0x3b/0xf0
[   99.136588]  ? ww_mutex_lock+0x2b/0x90
[   99.136590]  ww_mutex_lock+0x2b/0x90
[   99.136592]  dma_buf_detach+0x3b/0xf0
[   99.136595]  drm_prime_gem_destroy+0x2f/0x40 [drm]
[   99.136638]  xe_ttm_bo_destroy+0x32/0x220 [xe]
[   99.136734]  ? __mutex_unlock_slowpath+0x3a/0x290
[   99.136738]  drm_exec_unlock_all+0xa1/0xd0 [drm_exec]
[   99.136741]  drm_exec_fini+0x12/0xb0 [drm_exec]
[   99.136743]  xe_validation_ctx_fini+0x15/0x40 [xe]
[   99.136848]  xe_vma_destroy_unlocked+0xb1/0xe0 [xe]
[   99.136954]  xe_vm_close_and_put+0x41a/0x9b0 [xe]
[   99.137056]  ? xa_find+0xe3/0x1e0
[   99.137060]  xe_file_close+0x10a/0x1c0 [xe]
[   99.137157]  drm_file_free+0x22a/0x280 [drm]
[   99.137193]  drm_release_noglobal+0x22/0x70 [drm]
[   99.137227]  __fput+0xf1/0x2d0
[   99.137231]  task_work_run+0x59/0x90
[   99.137235]  do_exit+0x330/0xb40
[   99.137238]  do_group_exit+0x36/0xa0
[   99.137241]  get_signal+0xbd2/0xbe0
[   99.137245]  arch_do_signal_or_restart+0x3e/0x240
[   99.137249]  syscall_exit_to_user_mode+0x1e7/0x290
[   99.137252]  do_syscall_64+0xa1/0x180
[   99.137255]  ? _raw_spin_unlock+0x23/0x40
[   99.137257]  ? look_up_lock_class+0x6f/0x120
[   99.137261]  ? __lock_acquire+0x417/0x2160
[   99.137264]  ? lock_acquire+0xcb/0x2d0
[   99.137266]  ? __set_task_comm+0x28/0x1e0
[   99.137268]  ? find_held_lock+0x2b/0x80
[   99.137271]  ? __set_task_comm+0xe1/0x1e0
[   99.137273]  ? lock_release+0xca/0x290
[   99.137277]  ? __do_sys_prctl+0x245/0xab0
[   99.137279]  ? lockdep_hardirqs_on_prepare+0xde/0x190
[   99.137281]  ? syscall_exit_to_user_mode+0xb0/0x290
[   99.137284]  ? do_syscall_64+0xa1/0x180
[   99.137286]  ? cpuset_cpus_allowed+0x36/0x140
[   99.137289]  ? find_held_lock+0x2b/0x80
[   99.137291]  ? 

Re: [PATCH v5 08/12] drm/ttm: Add a virtual base class for graphics memory backup

2024-06-24 Thread Thomas Hellström
On Mon, 2024-06-24 at 11:26 +0200, Thomas Hellström wrote:
> On Thu, 2024-06-20 at 15:17 +, Matthew Brost wrote:
> > On Tue, Jun 18, 2024 at 09:18:16AM +0200, Thomas Hellström wrote:
> > > Initially intended for experimenting with different backup
> > > solutions (shmem vs direct swap cache insertion), abstract
> > > the backup destination using a virtual base class.
> > > 
> > > Also provide a sample implementation for shmem.
> > > 
> > > While when settling on a preferred backup solution, one could
> > > perhaps skip the abstraction, this functionality may actually
> > > come in handy for configurable dedicated graphics memory
> > > backup to fast nvme files or similar, whithout affecting
> > > swap-space. Could indeed be useful for VRAM backup on S4 and
> > > other cases.
> > > 
> > 
> > Implementation seemly makes sense and matches other similar usages
> > of
> > shmem /
> > folio functions I could find in the kernel.
> > 
> > A few questions / nits below.
> > 
> > > v5:
> > > - Fix a UAF. (kernel test robot, Dan Carptenter)
> > > 
> > > Cc: Christian König 
> > > Cc: Somalapuram Amaranath 
> > > Cc: Matthew Brost 
> > > Cc: 
> > > Signed-off-by: Thomas Hellström
> > > 
> > > ---
> > >  drivers/gpu/drm/ttm/Makefile   |   2 +-
> > >  drivers/gpu/drm/ttm/ttm_backup_shmem.c | 139
> > > +
> > >  include/drm/ttm/ttm_backup.h   | 136
> > > 
> > >  3 files changed, 276 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > >  create mode 100644 include/drm/ttm/ttm_backup.h
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/Makefile
> > > b/drivers/gpu/drm/ttm/Makefile
> > > index dad298127226..5e980dd90e41 100644
> > > --- a/drivers/gpu/drm/ttm/Makefile
> > > +++ b/drivers/gpu/drm/ttm/Makefile
> > > @@ -4,7 +4,7 @@
> > >  
> > >  ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o
> > > ttm_module.o
> > > \
> > >   ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o
> > > ttm_pool.o \
> > > - ttm_device.o ttm_sys_manager.o
> > > + ttm_device.o ttm_sys_manager.o ttm_backup_shmem.o
> > >  ttm-$(CONFIG_AGP) += ttm_agp_backend.o
> > >  
> > >  obj-$(CONFIG_DRM_TTM) += ttm.o
> > > diff --git a/drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > > b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > > new file mode 100644
> > > index ..f5bc47734d71
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > > @@ -0,0 +1,139 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2024 Intel Corporation
> > > + */
> > > +
> > > +#include 
> > > +#include 
> > > +
> > > +/**
> > > + * struct ttm_backup_shmem - A shmem based ttm_backup subclass.
> > > + * @backup: The base struct ttm_backup
> > > + * @filp: The associated shmem object
> > > + */
> > > +struct ttm_backup_shmem {
> > > + struct ttm_backup backup;
> > > + struct file *filp;
> > > +};
> > > +
> > > +static struct ttm_backup_shmem *to_backup_shmem(struct
> > > ttm_backup
> > > *backup)
> > > +{
> > > + return container_of(backup, struct ttm_backup_shmem,
> > > backup);
> > > +}
> > > +
> > > +static void ttm_backup_shmem_drop(struct ttm_backup *backup,
> > > unsigned long handle)
> > > +{
> > > + handle -= 1;
> > 
> > Can you explain the -1 / +1 usage to handle in this code? Is it to
> > test
> > that 'pgoff_t i' is indeed just a hint and return a different
> > handle?
> 
> It's IIRC because handle '0' has a reserved usage in the code, so
> handle becomes file address space + 1.
> 
> I need to double-check that so that I don't confuse this with the
> swap-space backend.

Ok, so the reason is that the direct swap-space backend uses
swp_entry_t as handles and returns the special swp_entry_t '0' as an
error indication. That is also used by the "backup_page()" callback and
documented there.

/Thomas

> 
> 
> > 
> > > + shmem_truncate_range(file_inode(to_backup_shmem(backup)-
> > > > filp), handle,
> > > +  handle + 1);
> > > +}
> > > +
> > > +static int ttm_backup_s

Re: [PATCH v5 08/12] drm/ttm: Add a virtual base class for graphics memory backup

2024-06-24 Thread Thomas Hellström
On Thu, 2024-06-20 at 15:17 +, Matthew Brost wrote:
> On Tue, Jun 18, 2024 at 09:18:16AM +0200, Thomas Hellström wrote:
> > Initially intended for experimenting with different backup
> > solutions (shmem vs direct swap cache insertion), abstract
> > the backup destination using a virtual base class.
> > 
> > Also provide a sample implementation for shmem.
> > 
> > While when settling on a preferred backup solution, one could
> > perhaps skip the abstraction, this functionality may actually
> > come in handy for configurable dedicated graphics memory
> > backup to fast nvme files or similar, whithout affecting
> > swap-space. Could indeed be useful for VRAM backup on S4 and
> > other cases.
> > 
> 
> Implementation seemly makes sense and matches other similar usages of
> shmem /
> folio functions I could find in the kernel.
> 
> A few questions / nits below.
> 
> > v5:
> > - Fix a UAF. (kernel test robot, Dan Carptenter)
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/ttm/Makefile   |   2 +-
> >  drivers/gpu/drm/ttm/ttm_backup_shmem.c | 139
> > +
> >  include/drm/ttm/ttm_backup.h   | 136
> > 
> >  3 files changed, 276 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/ttm/ttm_backup_shmem.c
> >  create mode 100644 include/drm/ttm/ttm_backup.h
> > 
> > diff --git a/drivers/gpu/drm/ttm/Makefile
> > b/drivers/gpu/drm/ttm/Makefile
> > index dad298127226..5e980dd90e41 100644
> > --- a/drivers/gpu/drm/ttm/Makefile
> > +++ b/drivers/gpu/drm/ttm/Makefile
> > @@ -4,7 +4,7 @@
> >  
> >  ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o
> > \
> >     ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o
> > ttm_pool.o \
> > -   ttm_device.o ttm_sys_manager.o
> > +   ttm_device.o ttm_sys_manager.o ttm_backup_shmem.o
> >  ttm-$(CONFIG_AGP) += ttm_agp_backend.o
> >  
> >  obj-$(CONFIG_DRM_TTM) += ttm.o
> > diff --git a/drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > new file mode 100644
> > index ..f5bc47734d71
> > --- /dev/null
> > +++ b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
> > @@ -0,0 +1,139 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +#include 
> > +#include 
> > +
> > +/**
> > + * struct ttm_backup_shmem - A shmem based ttm_backup subclass.
> > + * @backup: The base struct ttm_backup
> > + * @filp: The associated shmem object
> > + */
> > +struct ttm_backup_shmem {
> > +   struct ttm_backup backup;
> > +   struct file *filp;
> > +};
> > +
> > +static struct ttm_backup_shmem *to_backup_shmem(struct ttm_backup
> > *backup)
> > +{
> > +   return container_of(backup, struct ttm_backup_shmem,
> > backup);
> > +}
> > +
> > +static void ttm_backup_shmem_drop(struct ttm_backup *backup,
> > unsigned long handle)
> > +{
> > +   handle -= 1;
> 
> Can you explain the -1 / +1 usage to handle in this code? Is it to
> test
> that 'pgoff_t i' is indeed just a hint and return a different handle?

It's IIRC because handle '0' has a reserved usage in the code, so
handle becomes file address space + 1.

I need to double-check that so that I don't confuse this with the
swap-space backend.


> 
> > +   shmem_truncate_range(file_inode(to_backup_shmem(backup)-
> > >filp), handle,
> > +    handle + 1);
> > +}
> > +
> > +static int ttm_backup_shmem_copy_page(struct ttm_backup *backup,
> > struct page *dst,
> > +     unsigned long handle, bool
> > killable)
> 
> In the vfunc definition 'killable' is named 'intr'. I'd keep the
> naming
> consistent.

Sure.


> 
> > +{
> > +   struct file *filp = to_backup_shmem(backup)->filp;
> > +   struct address_space *mapping = filp->f_mapping;
> > +   struct folio *from_folio;
> > +
> > +   handle -= 1;
> > +   from_folio = shmem_read_folio(mapping, handle);
> > +   if (IS_ERR(from_folio))
> > +   return PTR_ERR(from_folio);
> > +
> > +   /* Note: Use drm_memcpy_from_wc? */
> > +   copy_highpage(dst, folio_file_page(from_folio, handle));
> > +   folio_put(from_folio);
> > +
> > +   return 0;
> > +}
> > +
&g

Re: [PATCH v5 07/12] drm/ttm: Use the LRU walker for eviction

2024-06-24 Thread Thomas Hellström
On Wed, 2024-06-19 at 23:33 +, Matthew Brost wrote:
> On Tue, Jun 18, 2024 at 09:18:15AM +0200, Thomas Hellström wrote:
> > Use the LRU walker for eviction. This helps
> > removing a lot of code with weird locking
> > semantics.
> > 
> > The functionality is slightly changed so that
> > when trylocked buffer objects are exhausted, we
> > continue to interleave walks with ticket-locks while
> > there is still progress made. The list walks are
> > not restarted in-between evictions.
> > 
> > Also provide a separate ttm_bo_evict_first()
> > function for its single user. The context of that
> > user allows sleeping dma_resv locks.
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c   | 350 -
> > 
> >  drivers/gpu/drm/ttm/ttm_resource.c |  20 +-
> >  include/drm/ttm/ttm_bo.h   |   8 +-
> >  3 files changed, 145 insertions(+), 233 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 63a91b77f7da..316afe19a325 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct
> > ttm_buffer_object *bo)
> >     dma_resv_iter_end();
> >  }
> >  
> > -/**
> > - * ttm_bo_cleanup_refs
> > - * If bo idle, remove from lru lists, and unref.
> > - * If not idle, block if possible.
> > - *
> > - * Must be called with lru_lock and reservation held, this
> > function
> > - * will drop the lru lock and optionally the reservation lock
> > before returning.
> > - *
> > - * @bo:    The buffer object to clean-up
> > - * @interruptible: Any sleeps should occur interruptibly.
> > - * @no_wait_gpu:   Never wait for gpu. Return -EBUSY
> > instead.
> > - * @unlock_resv:   Unlock the reservation lock as well.
> > - */
> > -
> > -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> > -      bool interruptible, bool
> > no_wait_gpu,
> > -      bool unlock_resv)
> > -{
> > -   struct dma_resv *resv = >base._resv;
> > -   int ret;
> > -
> > -   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
> > -   ret = 0;
> > -   else
> > -   ret = -EBUSY;
> > -
> > -   if (ret && !no_wait_gpu) {
> > -   long lret;
> > -
> > -   if (unlock_resv)
> > -   dma_resv_unlock(bo->base.resv);
> > -   spin_unlock(>bdev->lru_lock);
> > -
> > -   lret = dma_resv_wait_timeout(resv,
> > DMA_RESV_USAGE_BOOKKEEP,
> > -    interruptible,
> > -    30 * HZ);
> > -
> > -   if (lret < 0)
> > -   return lret;
> > -   else if (lret == 0)
> > -   return -EBUSY;
> > -
> > -   spin_lock(>bdev->lru_lock);
> > -   if (unlock_resv && !dma_resv_trylock(bo-
> > >base.resv)) {
> > -   /*
> > -* We raced, and lost, someone else holds
> > the reservation now,
> > -* and is probably busy in
> > ttm_bo_cleanup_memtype_use.
> > -*
> > -* Even if it's not the case, because we
> > finished waiting any
> > -* delayed destruction would succeed, so
> > just return success
> > -* here.
> > -*/
> > -   spin_unlock(>bdev->lru_lock);
> > -   return 0;
> > -   }
> > -   ret = 0;
> > -   }
> > -
> > -   if (ret) {
> > -   if (unlock_resv)
> > -   dma_resv_unlock(bo->base.resv);
> > -   spin_unlock(>bdev->lru_lock);
> > -   return ret;
> > -   }
> > -
> > -   spin_unlock(>bdev->lru_lock);
> > -   ttm_bo_cleanup_memtype_use(bo);
> > -
> > -   if (unlock_resv)
> > -   dma_resv_unlock(bo->base.resv);
> > -
> > -   return 0;
> > -}
> > -
> >  /*
> >   * Block for the dma_resv object to become idle, lock the buffer
> > and clean up
> >   * the resource and tt object.

Re: [PATCH v5 07/12] drm/ttm: Use the LRU walker for eviction

2024-06-24 Thread Thomas Hellström
Hi, Matthew

On Wed, 2024-06-19 at 22:52 +, Matthew Brost wrote:
> On Tue, Jun 18, 2024 at 09:18:15AM +0200, Thomas Hellström wrote:
> > Use the LRU walker for eviction. This helps
> > removing a lot of code with weird locking
> > semantics.
> > 
> > The functionality is slightly changed so that
> > when trylocked buffer objects are exhausted, we
> > continue to interleave walks with ticket-locks while
> > there is still progress made. The list walks are
> > not restarted in-between evictions.
> > 
> > Also provide a separate ttm_bo_evict_first()
> > function for its single user. The context of that
> > user allows sleeping dma_resv locks.
> > 
> I'm inclined to RB this as I think I've made sense of it all but just
> have a few questions / nits first + one small bug.
> 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c   | 350 -
> > 
> >  drivers/gpu/drm/ttm/ttm_resource.c |  20 +-
> >  include/drm/ttm/ttm_bo.h   |   8 +-
> >  3 files changed, 145 insertions(+), 233 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 63a91b77f7da..316afe19a325 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct
> > ttm_buffer_object *bo)
> >     dma_resv_iter_end();
> >  }
> >  
> > -/**
> > - * ttm_bo_cleanup_refs
> > - * If bo idle, remove from lru lists, and unref.
> > - * If not idle, block if possible.
> > - *
> > - * Must be called with lru_lock and reservation held, this
> > function
> > - * will drop the lru lock and optionally the reservation lock
> > before returning.
> > - *
> > - * @bo:    The buffer object to clean-up
> > - * @interruptible: Any sleeps should occur interruptibly.
> > - * @no_wait_gpu:   Never wait for gpu. Return -EBUSY
> > instead.
> > - * @unlock_resv:   Unlock the reservation lock as well.
> > - */
> > -
> > -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> > -      bool interruptible, bool
> > no_wait_gpu,
> > -      bool unlock_resv)
> > -{
> > -   struct dma_resv *resv = >base._resv;
> > -   int ret;
> > -
> > -   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
> > -   ret = 0;
> > -   else
> > -   ret = -EBUSY;
> > -
> > -   if (ret && !no_wait_gpu) {
> > -   long lret;
> > -
> > -   if (unlock_resv)
> > -   dma_resv_unlock(bo->base.resv);
> > -   spin_unlock(>bdev->lru_lock);
> > -
> > -   lret = dma_resv_wait_timeout(resv,
> > DMA_RESV_USAGE_BOOKKEEP,
> > -    interruptible,
> > -    30 * HZ);
> > -
> > -   if (lret < 0)
> > -   return lret;
> > -   else if (lret == 0)
> > -   return -EBUSY;
> > -
> > -   spin_lock(>bdev->lru_lock);
> > -   if (unlock_resv && !dma_resv_trylock(bo-
> > >base.resv)) {
> > -   /*
> > -* We raced, and lost, someone else holds
> > the reservation now,
> > -* and is probably busy in
> > ttm_bo_cleanup_memtype_use.
> > -*
> > -* Even if it's not the case, because we
> > finished waiting any
> > -* delayed destruction would succeed, so
> > just return success
> > -* here.
> > -*/
> > -   spin_unlock(>bdev->lru_lock);
> > -   return 0;
> > -   }
> > -   ret = 0;
> > -   }
> > -
> > -   if (ret) {
> > -   if (unlock_resv)
> > -   dma_resv_unlock(bo->base.resv);
> > -   spin_unlock(>bdev->lru_lock);
> > -   return ret;
> > -   }
> > -
> > -   spin_unlock(>bdev->lru_lock);
> > -   ttm_bo_cleanup_memtype_use(bo);
> > -
> > -   if (unlock_resv)
> > -   dma_resv_unlock(bo->base.resv);
> > -
> > -   return 0;
> > -}
> > -
>

[PATCH] drm/xe: Use write-back caching mode for system memory on DGFX

2024-06-19 Thread Thomas Hellström
The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.

However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.

Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.

So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.

Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.

Signed-off-by: Thomas Hellström 
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra 
Cc: Matthew Auld 
Cc: dri-devel@lists.freedesktop.org
Cc: Joonas Lahtinen 
Cc: Effie Yu 
Cc: Matthew Brost 
Cc: Maarten Lankhorst 
Cc: Jose Souza 
Cc: Michal Mrozek 
Cc:  # v6.8+
---
 drivers/gpu/drm/xe/xe_bo.c   | 47 +++-
 drivers/gpu/drm/xe/xe_bo_types.h |  3 +-
 include/uapi/drm/xe_drm.h|  8 +-
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..31192d983d9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct 
ttm_buffer_object *ttm_bo,
struct xe_device *xe = xe_bo_device(bo);
struct xe_ttm_tt *tt;
unsigned long extra_pages;
-   enum ttm_caching caching;
+   enum ttm_caching caching = ttm_cached;
int err;
 
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct 
ttm_buffer_object *ttm_bo,
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
   PAGE_SIZE);
 
-   switch (bo->cpu_caching) {
-   case DRM_XE_GEM_CPU_CACHING_WC:
-   caching = ttm_write_combined;
-   break;
-   default:
-   caching = ttm_cached;
-   break;
-   }
-
-   WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
-
/*
-* Display scanout is always non-coherent with the CPU cache.
-*
-* For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
-* require a CPU:WC mapping.
+* DGFX system memory is always WB / ttm_cached, since
+* other caching modes are only supported on x86. DGFX
+* GPU system memory accesses are always coherent with the
+* CPU.
 */
-   if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
-   (xe->info.graphics_verx100 >= 1270 && bo->flags & 
XE_BO_FLAG_PAGETABLE))
-   caching = ttm_write_combined;
+   if (!IS_DGFX(xe)) {
+   switch (bo->cpu_caching) {
+   case DRM_XE_GEM_CPU_CACHING_WC:
+   caching = ttm_write_combined;
+   break;
+   default:
+   caching = ttm_cached;
+   break;
+   }
+
+   WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+   /*
+* Display scanout is always non-coherent with the CPU cache.
+*
+* For Xe_LPG and beyond, PPGTT PTE lookups are also
+* non-coherent and require a CPU:WC mapping.
+*/
+   if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+   (xe->info.graphics_verx100 >= 1270 &&
+bo->flags & XE_BO_FLAG_PAGETABLE))
+   caching = ttm_write_combined;
+   }
 
if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 86422e113d39..10450f1fbbde 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -66,7 +66,8 @@ struct xe_bo {
 
/**
 * @cpu_caching: CPU caching mode. Currently only used for userspace
-* objects.
+* objects. Exceptions are system memory on DGFX, which is always
+* WB.
 */
u16 cpu_caching;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 93e00be44b2d..1189b3044723 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -783,7 +783,13 @@ struct drm_xe_gem_create {
 #define DRM_XE_GEM_CPU_CACHING_WC  2
/**
 * @cpu_caching: The CPU caching mode to select for this object. If
-* mmaping the object the mode selected here will also be used.
+* mmaping the object the mode selected here will also be used. The

Re: [PATCH v5 06/12] drm/ttm: Use the LRU walker helper for swapping

2024-06-19 Thread Thomas Hellström
On Wed, 2024-06-19 at 04:23 +, Matthew Brost wrote:
> On Tue, Jun 18, 2024 at 09:18:14AM +0200, Thomas Hellström wrote:
> > Rework the TTM swapping to use the LRU walker helper.
> > This helps fixing up the ttm_bo_swapout() interface
> > to be consistent about not requiring any locking.
> > 
> > For now mimic the current behaviour of using trylock
> > only. We could be using ticket-locks here but defer
> > that until it's deemed necessary. The TTM swapout
> > functionality is a bit weird anyway since it
> > alternates between memory types without exhausting
> > TTM_PL_SYSTEM first.
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c | 112 +--
> > 
> >  drivers/gpu/drm/ttm/ttm_device.c |  30 ++---
> >  include/drm/ttm/ttm_bo.h |   5 +-
> >  3 files changed, 83 insertions(+), 64 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 43eda720657f..63a91b77f7da 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1118,11 +1118,23 @@ int ttm_bo_wait_ctx(struct
> > ttm_buffer_object *bo, struct ttm_operation_ctx *ctx)
> >  }
> >  EXPORT_SYMBOL(ttm_bo_wait_ctx);
> >  
> > -int ttm_bo_swapout(struct ttm_buffer_object *bo, struct
> > ttm_operation_ctx *ctx,
> > -      gfp_t gfp_flags)
> > +/**
> > + * struct ttm_bo_swapout_walk - Parameters for the swapout walk
> > + */
> > +struct ttm_bo_swapout_walk {
> > +   /** @walk: The walk base parameters. */
> > +   struct ttm_lru_walk walk;
> > +   /** @gfp_flags: The gfp flags to use for ttm_tt_swapout()
> > */
> > +   gfp_t gfp_flags;
> > +};
> > +
> > +static long
> > +ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct
> > ttm_buffer_object *bo)
> >  {
> > -   struct ttm_place place;
> > -   bool locked;
> > +   struct ttm_place place = {.mem_type = bo->resource-
> > >mem_type};
> > +   struct ttm_bo_swapout_walk *swapout_walk =
> > +   container_of(walk, typeof(*swapout_walk), walk);
> > +   struct ttm_operation_ctx *ctx = walk->ctx;
> >     long ret;
> >  
> >     /*
> > @@ -1131,28 +1143,29 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >  * The driver may use the fact that we're moving from
> > SYSTEM
> >  * as an indication that we're about to swap out.
> >  */
> > -   memset(, 0, sizeof(place));
> > -   place.mem_type = bo->resource->mem_type;
> > -   if (!ttm_bo_evict_swapout_allowable(bo, ctx, ,
> > , NULL))
> > -   return -EBUSY;
> > +   if (!bo->bdev->funcs->eviction_valuable(bo, )) {
> > +   ret = -EBUSY;
> > +   goto out;
> > +   }
> >  
> >     if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
> >         bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL ||
> > -       bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED ||
> > -       !ttm_bo_get_unless_zero(bo)) {
> > -   if (locked)
> > -   dma_resv_unlock(bo->base.resv);
> > -   return -EBUSY;
> > +       bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) {
> > +   ret = -EBUSY;
> 
> I think answers my -EBUSY question from here [1]. In these cases we
> continue LRU walk as eviction of the BO is not valuable.
> 
> [1]
> https://patchwork.freedesktop.org/patch/599606/?series=131815=5#comment_1091419
> 
> > +   goto out;
> >     }
> >  
> >     if (bo->deleted) {
> > -   ret = ttm_bo_cleanup_refs(bo, false, false,
> > locked);
> > -   ttm_bo_put(bo);
> > -   return ret == -EBUSY ? -ENOSPC : ret;
> > -   }
> > +   pgoff_t num_pages = bo->ttm->num_pages;
> >  
> > -   /* TODO: Cleanup the locking */
> > -   spin_unlock(>bdev->lru_lock);
> > +   ret = ttm_bo_wait_ctx(bo, ctx);
> > +   if (ret)
> > +   goto out;
> > +
> > +   ttm_bo_cleanup_memtype_use(bo);
> > +   ret = num_pages;
> > +   goto out;
> > +   }
> >  
> >     /*
> >  * Move to system cached
> > @@ -1164,12 +1177,13 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >     m

Re: [PATCH v5 04/12] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-06-19 Thread Thomas Hellström
On Wed, 2024-06-19 at 03:37 +, Matthew Brost wrote:
> On Tue, Jun 18, 2024 at 09:18:12AM +0200, Thomas Hellström wrote:
> 
> Ugh, replying to correct version again...
> 
> > To address the problem with hitches moving when bulk move
> > sublists are lru-bumped, register the list cursors with the
> > ttm_lru_bulk_move structure when traversing its list, and
> > when lru-bumping the list, move the cursor hitch to the tail.
> 
> - So the hitch moves to the tail (last) which points to the next item
> in
>   the LRU list
> - Then bulk is moved which is from first -> last to the end of the
> LRU
>   list
> - Now the hitch remains in the correct position in the list (i.e. it
>   didn't move with the bulk)
> 
> Did I get that right?

Yes, correct.

> 
> > This also means it's mandatory for drivers to call
> > ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
> > initializing and finalizing the bulk move structure, so add
> > those calls to the amdgpu- and xe driver.
> > 
> > Compared to v1 this is slightly more code but less fragile
> > and hopefully easier to understand.
> > 
> > Changes in previous series:
> > - Completely rework the functionality
> > - Avoid a NULL pointer dereference assigning manager->mem_type
> > - Remove some leftover code causing build problems
> > v2:
> > - For hitch bulk tail moves, store the mem_type in the cursor
> >   instead of with the manager.
> > v3:
> > - Remove leftover mem_type member from change in v2.
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
> >  drivers/gpu/drm/ttm/ttm_resource.c | 89
> > ++
> >  drivers/gpu/drm/xe/xe_vm.c |  4 ++
> >  include/drm/ttm/ttm_resource.h | 56 ++--
> >  4 files changed, 132 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 3abfa66d72a2..97743993d711 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -2420,6 +2420,8 @@ int amdgpu_vm_init(struct amdgpu_device
> > *adev, struct amdgpu_vm *vm,
> >     if (r)
> >     return r;
> >  
> > +   ttm_lru_bulk_move_init(>lru_bulk_move);
> > +
> >     vm->is_compute_context = false;
> >  
> >     vm->use_cpu_for_update = !!(adev-
> > >vm_manager.vm_update_mode &
> > @@ -2484,6 +2486,7 @@ int amdgpu_vm_init(struct amdgpu_device
> > *adev, struct amdgpu_vm *vm,
> >  error_free_delayed:
> >     dma_fence_put(vm->last_tlb_flush);
> >     dma_fence_put(vm->last_unlocked);
> > +   ttm_lru_bulk_move_fini(>mman.bdev, 
> > >lru_bulk_move);
> >     amdgpu_vm_fini_entities(vm);
> >  
> >     return r;
> > @@ -2640,6 +2643,7 @@ void amdgpu_vm_fini(struct amdgpu_device
> > *adev, struct amdgpu_vm *vm)
> >     }
> >     }
> >  
> > +   ttm_lru_bulk_move_fini(>mman.bdev, 
> > >lru_bulk_move);
> >  }
> >  
> >  /**
> > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c
> > b/drivers/gpu/drm/ttm/ttm_resource.c
> > index 9c8b6499edfb..a03090683e79 100644
> > --- a/drivers/gpu/drm/ttm/ttm_resource.c
> > +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> > @@ -33,6 +33,49 @@
> >  
> >  #include 
> >  
> > +/* Detach the cursor from the bulk move list*/
> > +static void
> > +ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
> > +{
> 
> A lockdep annotation wouldn't hurt here.

Will add.

> 
> > +   cursor->bulk = NULL;
> > +   list_del_init(>bulk_link);
> > +}
> > +
> > +/* Move the cursor to the end of the bulk move list it's in */
> > +static void ttm_resource_cursor_move_bulk_tail(struct
> > ttm_lru_bulk_move *bulk,
> > +      struct
> > ttm_resource_cursor *cursor)
> > +{
> > +   struct ttm_lru_bulk_move_pos *pos;
> > +
> 
> A lockdep annotation wouldn't hurt here too.

+1!

> 
> > +   if (WARN_ON_ONCE(bulk != cursor->bulk)) {
> > +   list_del_init(>bulk_link);
> > +   return;
> > +   }
> > +
> > +   pos = >pos[cursor->mem_type][cursor->priority];
> > +   if (pos)
> 
> 'if (pos->last)'?
> 
> As 'if (pos)' is go

Re: [PATCH v5 05/12] drm/ttm: Provide a generic LRU walker helper

2024-06-19 Thread Thomas Hellström
Hi, Matthew. 

Thanks for reviewing.

On Tue, 2024-06-18 at 22:11 +, Matthew Brost wrote:
> On Tue, Jun 18, 2024 at 09:18:13AM +0200, Thomas Hellström wrote:
> 
> Replying to correct version...
> 
> > Provide a generic LRU walker in TTM, in the spirit of
> > drm_gem_lru_scan()
> > but building on the restartable TTM LRU functionality.
> > 
> > The LRU walker optionally supports locking objects as part of
> > a ww mutex locking transaction, to mimic to some extent the
> > current functionality in ttm. However any -EDEADLK return
> > is converted to -ENOMEM, so that the driver will need to back
> > off and possibly retry without being able to keep the
> > ticket.
> > 
> 
> Wouldn't the backoff be unlock everything but keep the ticket?

We can't do that (yet) since we don't have the drm_exec or similar
functionality. The missing part is that if keep the ticket, it's in
contended state which means we need to slow-lock the contending lock as
the first lock. And the caller doesn't know which lock is the
contending one

That is all addressed in the RFC part of the series that I left out for
now. This is only trying to mimic current functionality.

> 
> > v3:
> > - Move the helper to core ttm.
> > - Remove the drm_exec usage from it for now, it will be
> >   reintroduced later in the series.
> > v4:
> > - Handle the -EALREADY case if ticketlocking.
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo_util.c | 145
> > ++
> >  include/drm/ttm/ttm_bo.h  |  32 +++
> >  2 files changed, 177 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index 0b3f4267130c..45fcaf6f8644 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -768,3 +768,148 @@ int ttm_bo_pipeline_gutting(struct
> > ttm_buffer_object *bo)
> >     ttm_tt_destroy(bo->bdev, ttm);
> >     return ret;
> >  }
> > +
> > +static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
> > +struct ttm_buffer_object *bo,
> > +bool *needs_unlock)
> > +{
> > +   struct ttm_operation_ctx *ctx = walk->ctx;
> > +
> > +   *needs_unlock = false;
> > +
> > +   if (dma_resv_trylock(bo->base.resv)) {
> > +   *needs_unlock = true;
> > +   return true;
> > +   }
> > +
> > +   if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
> > +   dma_resv_assert_held(bo->base.resv);
> > +   return true;
> > +   }
> > +i
> 
> Any reason this is done after the try lock? Just kinda goofy as if
> this
> statement is true the dma_resv_trylock will always fail.

It should work either way. I guess I had viewed it as "trylock first,
if that fails, attempt any exception". I guess if we want to optimize
performance for shared lock implementations, moving it first might
avoid the atomic trylock operation, but I wouldn't expect a noticeable
difference.

> 
> > +   return false;
> > +}
> > +
> > +static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
> > +      struct ttm_buffer_object *bo,
> > +      bool *needs_unlock)
> > +{
> > +   struct dma_resv *resv = bo->base.resv;
> > +   int ret;
> > +
> 
> I suppose we don't have asserts here like in Xe but if we did,
> assert(walk->ticket)?

I agree. I think we'd really want a TTM assert or warning that could be
compiled away. In any case, I only expect a single caller of this
function.

> 
> > +   if (walk->ctx->interruptible)
> > +   ret = dma_resv_lock_interruptible(resv, walk-
> > >ticket);
> > +   else
> > +   ret = dma_resv_lock(resv, walk->ticket);
> > +
> > +   if (!ret) {
> > +   *needs_unlock = true;
> > +   /* Only a single ticketlock per loop. */
> > +   walk->ticket = NULL;
> 
> Can you explain this a bit more? I see that once the walk->ticket is
> set
> to NULL this function will not be called again (i.e. only try locking
> will be used). I want to understand the reasoning for this.
> 
> It might be helpful for a more lengthly explaination in the comments
> of
> the code too.

I can add a more thorough explanation, Again, this is trying to mimic
the current code, that does a walk of trylo

[PATCH v5 11/12] drm/ttm, drm/xe: Add a shrinker for xe bos

2024-06-18 Thread Thomas Hellström
Rather than relying on the TTM watermark accounting add a shrinker
for xe_bos in TT or system memory.

Leverage the newly added TTM per-page shrinking and shmem backup
support.

Although xe doesn't fully support WONTNEED (purgeable) bos yet,
introduce and add shrinker support for purgeable ttm_tts.

v2:
- Cleanups bugfixes and a KUNIT shrinker test.
- Add writeback support, and activate if kswapd.
v3:
- Move the try_shrink() helper to core TTM.
- Minor cleanups.
v4:
- Add runtime pm for the shrinker. Shrinking may require an active
  device for CCS metadata copying.
v5:
- Separately purge ghost- and zombie objects in the shrinker.
- Fix a format specifier - type inconsistency. (Kernel test robot).

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c |  67 ++
 drivers/gpu/drm/xe/Makefile   |   1 +
 drivers/gpu/drm/xe/tests/xe_bo.c  | 118 +++
 drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
 drivers/gpu/drm/xe/xe_bo.c| 155 --
 drivers/gpu/drm/xe/xe_bo.h|  26 +++
 drivers/gpu/drm/xe/xe_device.c|   8 +
 drivers/gpu/drm/xe/xe_device_types.h  |   2 +
 drivers/gpu/drm/xe/xe_shrinker.c  | 287 ++
 drivers/gpu/drm/xe/xe_shrinker.h  |  18 ++
 include/drm/ttm/ttm_bo.h  |   3 +
 12 files changed, 671 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_shrinker.c
 create mode 100644 drivers/gpu/drm/xe/xe_shrinker.h

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 45fcaf6f8644..44711f422459 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -913,3 +913,70 @@ long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, 
struct ttm_device *bdev,
ttm_resource_cursor_fini();
return sofar;
 }
+EXPORT_SYMBOL(ttm_lru_walk_for_evict);
+
+/**
+ * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
+ * @walk: The struct xe_ttm_lru_walk that describes the walk.
+ * @bo: The buffer object.
+ * @purge: Whether to attempt to purge the bo content since it's no
+ * longer needed.
+ * @writeback: If !@purge, attempt to write out to persistent storage.
+ *
+ * The function uses the ttm_tt_back_up functionality to back up or
+ * purge a struct ttm_tt. If the bo is not in system, it's first
+ * moved there.
+ *
+ * Return: The number of pages shrunken or purged, or
+ * negative error code on failure.
+ */
+long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+  bool purge, bool writeback)
+{
+   static const struct ttm_place sys_placement_flags = {
+   .fpfn = 0,
+   .lpfn = 0,
+   .mem_type = TTM_PL_SYSTEM,
+   .flags = 0,
+   };
+   static struct ttm_placement sys_placement = {
+   .num_placement = 1,
+   .placement = _placement_flags,
+   };
+   struct ttm_operation_ctx *ctx = walk->ctx;
+   struct ttm_tt *tt = bo->ttm;
+   long lret;
+
+   dma_resv_assert_held(bo->base.resv);
+
+   if (!tt || !ttm_tt_is_populated(tt))
+   return 0;
+
+   if (bo->resource->mem_type != TTM_PL_SYSTEM) {
+   int ret = ttm_bo_validate(bo, _placement, ctx);
+
+   if (ret) {
+   if (ret == -EINTR || ret == -EDEADLK ||
+   ret == -ERESTARTSYS)
+   return ret;
+   return 0;
+   }
+   }
+
+   lret = ttm_bo_wait_ctx(bo, ctx);
+   if (lret < 0) {
+   if (lret == -ERESTARTSYS)
+   return lret;
+   return 0;
+   }
+
+   if (bo->deleted)
+   lret = ttm_tt_backup(bo->bdev, tt, true, writeback);
+   else
+   lret = ttm_tt_backup(bo->bdev, tt, purge, writeback);
+   if (lret < 0 && lret != -EINTR)
+   return 0;
+
+   return lret;
+}
+EXPORT_SYMBOL(ttm_bo_try_shrink);
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1905a80e61e3..46304592d94d 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -107,6 +107,7 @@ xe-y += xe_bb.o \
xe_ring_ops.o \
xe_sa.o \
xe_sched_job.o \
+   xe_shrinker.o \
xe_step.o \
xe_sync.o \
xe_tile.o \
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9f3c02826464..49617f16dc76 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,8 @@
 #include 
 #include 
 
+#include 
+
 #include "tests/xe_bo_test.h"
 #include "tests/xe_pci_test.h"
 #include "tests/xe_test.h"
@@ -350,3 +352,119 @@ void xe_bo_evict_kunit(struc

[PATCH v5 08/12] drm/ttm: Add a virtual base class for graphics memory backup

2024-06-18 Thread Thomas Hellström
Initially intended for experimenting with different backup
solutions (shmem vs direct swap cache insertion), abstract
the backup destination using a virtual base class.

Also provide a sample implementation for shmem.

While when settling on a preferred backup solution, one could
perhaps skip the abstraction, this functionality may actually
come in handy for configurable dedicated graphics memory
backup to fast nvme files or similar, whithout affecting
swap-space. Could indeed be useful for VRAM backup on S4 and
other cases.

v5:
- Fix a UAF. (kernel test robot, Dan Carptenter)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/Makefile   |   2 +-
 drivers/gpu/drm/ttm/ttm_backup_shmem.c | 139 +
 include/drm/ttm/ttm_backup.h   | 136 
 3 files changed, 276 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_backup_shmem.c
 create mode 100644 include/drm/ttm/ttm_backup.h

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index dad298127226..5e980dd90e41 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,7 +4,7 @@
 
 ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \
-   ttm_device.o ttm_sys_manager.o
+   ttm_device.o ttm_sys_manager.o ttm_backup_shmem.o
 ttm-$(CONFIG_AGP) += ttm_agp_backend.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_backup_shmem.c 
b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
new file mode 100644
index ..f5bc47734d71
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include 
+#include 
+
+/**
+ * struct ttm_backup_shmem - A shmem based ttm_backup subclass.
+ * @backup: The base struct ttm_backup
+ * @filp: The associated shmem object
+ */
+struct ttm_backup_shmem {
+   struct ttm_backup backup;
+   struct file *filp;
+};
+
+static struct ttm_backup_shmem *to_backup_shmem(struct ttm_backup *backup)
+{
+   return container_of(backup, struct ttm_backup_shmem, backup);
+}
+
+static void ttm_backup_shmem_drop(struct ttm_backup *backup, unsigned long 
handle)
+{
+   handle -= 1;
+   shmem_truncate_range(file_inode(to_backup_shmem(backup)->filp), handle,
+handle + 1);
+}
+
+static int ttm_backup_shmem_copy_page(struct ttm_backup *backup, struct page 
*dst,
+ unsigned long handle, bool killable)
+{
+   struct file *filp = to_backup_shmem(backup)->filp;
+   struct address_space *mapping = filp->f_mapping;
+   struct folio *from_folio;
+
+   handle -= 1;
+   from_folio = shmem_read_folio(mapping, handle);
+   if (IS_ERR(from_folio))
+   return PTR_ERR(from_folio);
+
+   /* Note: Use drm_memcpy_from_wc? */
+   copy_highpage(dst, folio_file_page(from_folio, handle));
+   folio_put(from_folio);
+
+   return 0;
+}
+
+static unsigned long
+ttm_backup_shmem_backup_page(struct ttm_backup *backup, struct page *page,
+bool writeback, pgoff_t i, gfp_t page_gfp,
+gfp_t alloc_gfp)
+{
+   struct file *filp = to_backup_shmem(backup)->filp;
+   struct address_space *mapping = filp->f_mapping;
+   unsigned long handle = 0;
+   struct folio *to_folio;
+   int ret;
+
+   to_folio = shmem_read_folio_gfp(mapping, i, alloc_gfp);
+   if (IS_ERR(to_folio))
+   return handle;
+
+   folio_mark_accessed(to_folio);
+   folio_lock(to_folio);
+   folio_mark_dirty(to_folio);
+   copy_highpage(folio_file_page(to_folio, i), page);
+   handle = i + 1;
+
+   if (writeback && !folio_mapped(to_folio) && 
folio_clear_dirty_for_io(to_folio)) {
+   struct writeback_control wbc = {
+   .sync_mode = WB_SYNC_NONE,
+   .nr_to_write = SWAP_CLUSTER_MAX,
+   .range_start = 0,
+   .range_end = LLONG_MAX,
+   .for_reclaim = 1,
+   };
+   folio_set_reclaim(to_folio);
+   ret = mapping->a_ops->writepage(folio_page(to_folio, 0), );
+   if (!folio_test_writeback(to_folio))
+   folio_clear_reclaim(to_folio);
+   /* If writepage succeeds, it unlocks the folio */
+   if (ret)
+   folio_unlock(to_folio);
+   } else {
+   folio_unlock(to_folio);
+   }
+
+   folio_put(to_folio);
+
+   return handle;
+}
+
+static void ttm_backup_shmem_fini(struct ttm_backup *backup)
+{
+   struct ttm_backup_shmem *sbackup = to_backup_shmem(backup);
+
+   fput(

[PATCH v5 12/12] drm/xe: Increase the XE_PL_TT watermark

2024-06-18 Thread Thomas Hellström
The XE_PL_TT watermark was set to 50% of system memory.
The idea behind that was unclear since the net effect is that
TT memory will be evicted to TTM_PL_SYSTEM memory if that
watermark is exceeded, requiring PPGTT rebinds and dma
remapping. But there is no similar watermark for TTM_PL_SYSTEM
memory.

The TTM functionality that tries to swap out system memory to
shmem objects if a 50% limit of total system memory is reached
is orthogonal to this, and with the shrinker added, it's no
longer in effect.

Replace the 50% TTM_PL_TT limit with a 100% limit, in effect
allowing all graphics memory to be bound to the device unless it
has been swapped out by the shrinker.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c 
b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 9844a8edbfe1..d38b91872da3 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -108,9 +108,8 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe)
u64 gtt_size;
 
si_meminfo();
+   /* Potentially restrict amount of TT memory here. */
gtt_size = (u64)si.totalram * si.mem_unit;
-   /* TTM limits allocation of all TTM devices by 50% of system memory */
-   gtt_size /= 2;
 
man->use_tt = true;
man->func = _ttm_sys_mgr_func;
-- 
2.44.0



[PATCH v5 09/12] drm/ttm/pool: Provide a helper to shrink pages

2024-06-18 Thread Thomas Hellström
Provide a helper to shrink ttm_tt page-vectors on a per-page
basis. A ttm_backup backend could then in theory get away with
allocating a single temporary page for each struct ttm_tt.

This is accomplished by splitting larger pages before trying to
back them up.

In the future we could allow ttm_backup to handle backing up
large pages as well, but currently there's no benefit in
doing that, since the shmem backup backend would have to
split those anyway to avoid allocating too much temporary
memory, and if the backend instead inserts pages into the
swap-cache, those are split on reclaim by the core.

Due to potential backup- and recover errors, allow partially swapped
out struct ttm_tt's, although mark them as swapped out stopping them
from being swapped out a second time. More details in the ttm_pool.c
DOC section.

v2:
- A couple of cleanups and error fixes in ttm_pool_back_up_tt.
- s/back_up/backup/
- Add a writeback parameter to the exported interface.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 397 +++--
 drivers/gpu/drm/ttm/ttm_tt.c   |  37 +++
 include/drm/ttm/ttm_pool.h |   5 +
 include/drm/ttm/ttm_tt.h   |  20 ++
 4 files changed, 446 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..38e50cf81b0a 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -41,6 +41,7 @@
 #include 
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -58,6 +59,32 @@ struct ttm_pool_dma {
unsigned long vaddr;
 };
 
+/**
+ * struct ttm_pool_tt_restore - State representing restore from backup
+ * @alloced_pages: Total number of already allocated pages for the ttm_tt.
+ * @restored_pages: Number of (sub) pages restored from swap for this
+ *  chunk of 1 << @order pages.
+ * @first_page: The ttm page ptr representing for @old_pages[0].
+ * @caching_divide: Page pointer where subsequent pages are cached.
+ * @old_pages: Backup copy of page pointers that were replaced by the new
+ *page allocation.
+ * @pool: The pool used for page allocation while restoring.
+ * @order: The order of the last page allocated while restoring.
+ *
+ * Recovery from backup might fail when we've recovered less than the
+ * full ttm_tt. In order not to loose any data (yet), keep information
+ * around that allows us to restart a failed ttm backup recovery.
+ */
+struct ttm_pool_tt_restore {
+   pgoff_t alloced_pages;
+   pgoff_t restored_pages;
+   struct page **first_page;
+   struct page **caching_divide;
+   struct ttm_pool *pool;
+   unsigned int order;
+   struct page *old_pages[];
+};
+
 static unsigned long page_pool_size;
 
 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
@@ -354,11 +381,102 @@ static unsigned int ttm_pool_page_order(struct ttm_pool 
*pool, struct page *p)
return p->private;
 }
 
+/*
+ * To be able to insert single pages into backup directly,
+ * we need to split multi-order page allocations and make them look
+ * like single-page allocations.
+ */
+static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
+{
+   unsigned int order = ttm_pool_page_order(pool, p);
+   pgoff_t nr;
+
+   if (!order)
+   return;
+
+   split_page(p, order);
+   nr = 1UL << order;
+   while (nr--)
+   (p++)->private = 0;
+}
+
+/**
+ * DOC: Partial backup and restoration of a struct ttm_tt.
+ *
+ * Swapout using ttm_backup::ops::backup_page() and swapin using
+ * ttm_backup::ops::copy_backed_up_page() may fail.
+ * The former most likely due to lack of swap-space or memory, the latter due
+ * to lack of memory or because of signal interruption during waits.
+ *
+ * Backupfailure is easily handled by using a ttm_tt pages vector that holds
+ * both swap entries and page pointers. This has to be taken into account when
+ * restoring such a ttm_tt from backup, and when freeing it while backed up.
+ * When restoring, for simplicity, new pages are actually allocated from the
+ * pool and the contents of any old pages are copied in and then the old pages
+ * are released.
+ *
+ * For restoration failures, the struct ttm_pool_tt_restore holds sufficient 
state
+ * to be able to resume an interrupted restore, and that structure is freed 
once
+ * the restoration is complete. If the struct ttm_tt is destroyed while there
+ * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
+ * care of.
+ */
+
+static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
+{
+   return restore && restore->restored_pages < (1 << restore->order);
+}
+
+static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
+  struct ttm_ba

[PATCH v5 10/12] drm/ttm: Use fault-injection to test error paths

2024-06-18 Thread Thomas Hellström
Use fault-injection to test partial TTM swapout and interrupted swapin.
Return -EINTR for swapin to test the callers ability to handle and
restart the swapin, and on swapout perform a partial swapout to test that
the swapin and release_shrunken functionality.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/Kconfig| 10 ++
 drivers/gpu/drm/ttm/ttm_pool.c | 17 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 981f43d4ca8c..5f010ec31002 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -272,6 +272,16 @@ config DRM_GPUVM
  GPU-VM representation providing helpers to manage a GPUs virtual
  address space
 
+config DRM_TTM_BACKUP_FAULT_INJECT
+   bool "Enable fault injection during TTM backup"
+   depends on DRM_TTM
+   default n
+   help
+ Inject recoverable failures during TTM backup and recovery of
+ backed-up objects. For DRM driver developers only.
+
+ If in doubt, choose N.
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 38e50cf81b0a..d32a1f2e5e50 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -431,6 +431,7 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore 
*restore,
   struct ttm_backup *backup,
   struct ttm_operation_ctx *ctx)
 {
+   static unsigned long __maybe_unused swappedin;
unsigned int i, nr = 1 << restore->order;
int ret = 0;
 
@@ -446,6 +447,13 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore 
*restore,
if (handle == 0)
continue;
 
+   if (IS_ENABLED(CONFIG_DRM_TTM_BACKUP_FAULT_INJECT) &&
+   ctx->interruptible &&
+   ++swappedin % 100 == 0) {
+   ret = -EINTR;
+   break;
+   }
+
ret = backup->ops->copy_backed_up_page
(backup, restore->first_page[i],
 handle, ctx->interruptible);
@@ -892,7 +900,14 @@ long ttm_pool_backup_tt(struct ttm_pool *pool, struct 
ttm_tt *ttm, bool purge,
 
alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | 
__GFP_RETRY_MAYFAIL;
 
-   for (i = 0; i < ttm->num_pages; ++i) {
+   num_pages = ttm->num_pages;
+
+   /* Pretend doing fault injection by shrinking only half of the pages. */
+
+   if (IS_ENABLED(CONFIG_DRM_TTM_BACKUP_FAULT_INJECT))
+   num_pages = DIV_ROUND_UP(num_pages, 2);
+
+   for (i = 0; i < num_pages; ++i) {
page = ttm->pages[i];
if (unlikely(!page))
continue;
-- 
2.44.0



[PATCH v5 07/12] drm/ttm: Use the LRU walker for eviction

2024-06-18 Thread Thomas Hellström
Use the LRU walker for eviction. This helps
removing a lot of code with weird locking
semantics.

The functionality is slightly changed so that
when trylocked buffer objects are exhausted, we
continue to interleave walks with ticket-locks while
there is still progress made. The list walks are
not restarted in-between evictions.

Also provide a separate ttm_bo_evict_first()
function for its single user. The context of that
user allows sleeping dma_resv locks.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 350 -
 drivers/gpu/drm/ttm/ttm_resource.c |  20 +-
 include/drm/ttm/ttm_bo.h   |   8 +-
 3 files changed, 145 insertions(+), 233 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 63a91b77f7da..316afe19a325 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct 
ttm_buffer_object *bo)
dma_resv_iter_end();
 }
 
-/**
- * ttm_bo_cleanup_refs
- * If bo idle, remove from lru lists, and unref.
- * If not idle, block if possible.
- *
- * Must be called with lru_lock and reservation held, this function
- * will drop the lru lock and optionally the reservation lock before returning.
- *
- * @bo:The buffer object to clean-up
- * @interruptible: Any sleeps should occur interruptibly.
- * @no_wait_gpu:   Never wait for gpu. Return -EBUSY instead.
- * @unlock_resv:   Unlock the reservation lock as well.
- */
-
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-  bool interruptible, bool no_wait_gpu,
-  bool unlock_resv)
-{
-   struct dma_resv *resv = >base._resv;
-   int ret;
-
-   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
-   ret = 0;
-   else
-   ret = -EBUSY;
-
-   if (ret && !no_wait_gpu) {
-   long lret;
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-
-   lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
-interruptible,
-30 * HZ);
-
-   if (lret < 0)
-   return lret;
-   else if (lret == 0)
-   return -EBUSY;
-
-   spin_lock(>bdev->lru_lock);
-   if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
-   /*
-* We raced, and lost, someone else holds the 
reservation now,
-* and is probably busy in ttm_bo_cleanup_memtype_use.
-*
-* Even if it's not the case, because we finished 
waiting any
-* delayed destruction would succeed, so just return 
success
-* here.
-*/
-   spin_unlock(>bdev->lru_lock);
-   return 0;
-   }
-   ret = 0;
-   }
-
-   if (ret) {
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-   return ret;
-   }
-
-   spin_unlock(>bdev->lru_lock);
-   ttm_bo_cleanup_memtype_use(bo);
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-
-   return 0;
-}
-
 /*
  * Block for the dma_resv object to become idle, lock the buffer and clean up
  * the resource and tt object.
@@ -505,151 +431,154 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object 
*bo,
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
-/*
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
+/**
+ * ttm_bo_evict_first() - Evict the first bo on the manager's LRU list.
+ * @bdev: The ttm device.
+ * @man: The manager whose bo to evict.
+ * @ctx: The TTM operation ctx governing the eviction.
  *
- * b. Otherwise, trylock it.
+ * Return: 0 if successful or the resource disappeared. Negative error code on 
error.
  */
-static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-  struct ttm_operation_ctx *ctx,
-  const struct ttm_place *place,
-  bool *locked, bool *busy)
+int ttm_bo_evict_first(struct ttm_device *bde

[PATCH v5 05/12] drm/ttm: Provide a generic LRU walker helper

2024-06-18 Thread Thomas Hellström
Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOMEM, so that the driver will need to back
off and possibly retry without being able to keep the
ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 145 ++
 include/drm/ttm/ttm_bo.h  |  32 +++
 2 files changed, 177 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0b3f4267130c..45fcaf6f8644 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -768,3 +768,148 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
ttm_tt_destroy(bo->bdev, ttm);
return ret;
 }
+
+static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
+struct ttm_buffer_object *bo,
+bool *needs_unlock)
+{
+   struct ttm_operation_ctx *ctx = walk->ctx;
+
+   *needs_unlock = false;
+
+   if (dma_resv_trylock(bo->base.resv)) {
+   *needs_unlock = true;
+   return true;
+   }
+
+   if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
+   dma_resv_assert_held(bo->base.resv);
+   return true;
+   }
+
+   return false;
+}
+
+static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
+  struct ttm_buffer_object *bo,
+  bool *needs_unlock)
+{
+   struct dma_resv *resv = bo->base.resv;
+   int ret;
+
+   if (walk->ctx->interruptible)
+   ret = dma_resv_lock_interruptible(resv, walk->ticket);
+   else
+   ret = dma_resv_lock(resv, walk->ticket);
+
+   if (!ret) {
+   *needs_unlock = true;
+   /* Only a single ticketlock per loop. */
+   walk->ticket = NULL;
+   } else if (ret == -EDEADLK) {
+   /* Caller needs to exit the ww transaction. */
+   ret = -ENOSPC;
+   }
+
+   return ret;
+}
+
+static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
+{
+   if (locked)
+   dma_resv_unlock(bo->base.resv);
+}
+
+/**
+ * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
+ * valid items.
+ * @walk: describe the walks and actions taken
+ * @bdev: The TTM device.
+ * @man: The struct ttm_resource manager whose LRU lists we're walking.
+ * @target: The end condition for the walk.
+ *
+ * The LRU lists of @man are walk, and for each struct ttm_resource 
encountered,
+ * the corresponding ttm_buffer_object is locked and taken a reference on, and
+ * the LRU lock is dropped. the LRU lock may be dropped before locking and, in
+ * that case, it's verified that the item actually remains on the LRU list 
after
+ * the lock, and that the buffer object didn't switch resource in between.
+ *
+ * With a locked object, the actions indicated by @walk->process_bo are
+ * performed, and after that, the bo is unlocked, the refcount dropped and the
+ * next struct ttm_resource is processed. Here, the walker relies on
+ * TTM's restartable LRU list implementation.
+ *
+ * Typically @walk->process_bo() would return the number of pages evicted,
+ * swapped or shrunken, so that when the total exceeds @target, or when the
+ * LRU list has been walked in full, iteration is terminated. It's also 
terminated
+ * on error. Note that the definition of @target is done by the caller, it
+ * could have a different meaning than the number of pages.
+ *
+ * Note that the way dma_resv individualization is done, locking needs to be 
done
+ * either with the LRU lock held (trylocking only) or with a reference on the
+ * object.
+ *
+ * Return: The progress made towards target or negative error code on error.
+ */
+long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
+   struct ttm_resource_manager *man, long target)
+{
+   struct ttm_resource_cursor cursor;
+   struct ttm_resource *res;
+   long sofar = 0;
+   long lret;
+
+   spin_lock(>lru_lock);
+   ttm_resource_manager_for_each_res(man, , res) {
+   struct ttm_buffer_object *bo = res->bo;
+   bool bo_needs_unlock = false;
+   bool bo_locked = false;
+   int mem_type;
+
+   if (!bo || bo->resource != res)
+  

[PATCH v5 06/12] drm/ttm: Use the LRU walker helper for swapping

2024-06-18 Thread Thomas Hellström
Rework the TTM swapping to use the LRU walker helper.
This helps fixing up the ttm_bo_swapout() interface
to be consistent about not requiring any locking.

For now mimic the current behaviour of using trylock
only. We could be using ticket-locks here but defer
that until it's deemed necessary. The TTM swapout
functionality is a bit weird anyway since it
alternates between memory types without exhausting
TTM_PL_SYSTEM first.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 112 +--
 drivers/gpu/drm/ttm/ttm_device.c |  30 ++---
 include/drm/ttm/ttm_bo.h |   5 +-
 3 files changed, 83 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 43eda720657f..63a91b77f7da 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1118,11 +1118,23 @@ int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, 
struct ttm_operation_ctx *ctx)
 }
 EXPORT_SYMBOL(ttm_bo_wait_ctx);
 
-int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-  gfp_t gfp_flags)
+/**
+ * struct ttm_bo_swapout_walk - Parameters for the swapout walk
+ */
+struct ttm_bo_swapout_walk {
+   /** @walk: The walk base parameters. */
+   struct ttm_lru_walk walk;
+   /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */
+   gfp_t gfp_flags;
+};
+
+static long
+ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 {
-   struct ttm_place place;
-   bool locked;
+   struct ttm_place place = {.mem_type = bo->resource->mem_type};
+   struct ttm_bo_swapout_walk *swapout_walk =
+   container_of(walk, typeof(*swapout_walk), walk);
+   struct ttm_operation_ctx *ctx = walk->ctx;
long ret;
 
/*
@@ -1131,28 +1143,29 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * The driver may use the fact that we're moving from SYSTEM
 * as an indication that we're about to swap out.
 */
-   memset(, 0, sizeof(place));
-   place.mem_type = bo->resource->mem_type;
-   if (!ttm_bo_evict_swapout_allowable(bo, ctx, , , NULL))
-   return -EBUSY;
+   if (!bo->bdev->funcs->eviction_valuable(bo, )) {
+   ret = -EBUSY;
+   goto out;
+   }
 
if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL ||
-   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED ||
-   !ttm_bo_get_unless_zero(bo)) {
-   if (locked)
-   dma_resv_unlock(bo->base.resv);
-   return -EBUSY;
+   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) {
+   ret = -EBUSY;
+   goto out;
}
 
if (bo->deleted) {
-   ret = ttm_bo_cleanup_refs(bo, false, false, locked);
-   ttm_bo_put(bo);
-   return ret == -EBUSY ? -ENOSPC : ret;
-   }
+   pgoff_t num_pages = bo->ttm->num_pages;
 
-   /* TODO: Cleanup the locking */
-   spin_unlock(>bdev->lru_lock);
+   ret = ttm_bo_wait_ctx(bo, ctx);
+   if (ret)
+   goto out;
+
+   ttm_bo_cleanup_memtype_use(bo);
+   ret = num_pages;
+   goto out;
+   }
 
/*
 * Move to system cached
@@ -1164,12 +1177,13 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
memset(, 0, sizeof(hop));
place.mem_type = TTM_PL_SYSTEM;
ret = ttm_resource_alloc(bo, , _mem);
-   if (unlikely(ret))
+   if (ret)
goto out;
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, );
-   if (unlikely(ret != 0)) {
-   WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput 
- likely driver bug.\n");
+   if (ret) {
+   WARN(ret == -EMULTIHOP,
+"Unexpected multihop in swapout - likely driver 
bug.\n");
ttm_resource_free(bo, _mem);
goto out;
}
@@ -1179,30 +1193,54 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * Make sure BO is idle.
 */
ret = ttm_bo_wait_ctx(bo, ctx);
-   if (unlikely(ret != 0))
+   if (ret)
goto out;
 
ttm_bo_unmap_virtual(bo);
-
-   /*
-* Swap out. Buffer will be swapped in again as soon as
-* anyone tries to access a ttm page.
-*/
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notif

[PATCH v5 03/12] drm/ttm: Use LRU hitches

2024-06-18 Thread Thomas Hellström
Have iterators insert themselves into the list they are iterating
over using hitch list nodes. Since only the iterator owner
can remove these list nodes from the list, it's safe to unlock
the list and when continuing, use them as a starting point. Due to
the way LRU bumping works in TTM, newly added items will not be
missed, and bumped items will be iterated over a second time before
reaching the end of the list.

The exception is list with bulk move sublists. When bumping a
sublist, a hitch that is part of that sublist will also be moved
and we might miss items if restarting from it. This will be
addressed in a later patch.

Changes in previous series:
- Updated ttm_resource_cursor_fini() documentation.
v2:
- Don't reorder ttm_resource_manager_first() and _next().
  (Christian König).
- Use list_add instead of list_move
  (Christian König)
v3:
- Split into two patches, one cleanup, one new functionality
  (Christian König)
- use ttm_resource_cursor_fini_locked() instead of open-coding
  (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  1 +
 drivers/gpu/drm/ttm/ttm_device.c   |  9 +++--
 drivers/gpu/drm/ttm/ttm_resource.c | 56 +-
 include/drm/ttm/ttm_resource.h |  9 +++--
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..43eda720657f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -621,6 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (locked)
dma_resv_unlock(res->bo->base.resv);
}
+   ttm_resource_cursor_fini_locked();
 
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 09411978a13a..f9e9b1ec8c8a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -170,12 +170,17 @@ int ttm_device_swapout(struct ttm_device *bdev, struct 
ttm_operation_ctx *ctx,
num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
-   if (!ret)
+   if (!ret) {
+   ttm_resource_cursor_fini();
return num_pages;
-   if (ret != -EBUSY)
+   }
+   if (ret != -EBUSY) {
+   ttm_resource_cursor_fini();
return ret;
+   }
}
}
+   ttm_resource_cursor_fini_locked();
spin_unlock(>lru_lock);
return 0;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 8bfbc0e8..9c8b6499edfb 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,37 @@
 
 #include 
 
+/**
+ * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called with the LRU lock held. The function
+ * can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor)
+{
+   lockdep_assert_held(>man->bdev->lru_lock);
+   list_del_init(>hitch.link);
+}
+
+/**
+ * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called without the LRU list lock held. The
+ * function can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor)
+{
+   spinlock_t *lru_lock = >man->bdev->lru_lock;
+
+   spin_lock(lru_lock);
+   ttm_resource_cursor_fini_locked(cursor);
+   spin_unlock(lru_lock);
+}
+
 /**
  * ttm_lru_bulk_move_init - initialize a bulk move structure
  * @bulk: the structure to init
@@ -485,12 +516,15 @@ void ttm_resource_manager_debug(struct 
ttm_resource_manager *man,
 EXPORT_SYMBOL(ttm_resource_manager_debug);
 
 /**
- * ttm_resource_manager_first
- *
+ * ttm_resource_manager_first() - Start iterating over the resources
+ * of a resource manager
  * @man: resource manager to iterate over
  * @cursor: cursor to record the position
  *
- * Returns the first resource from the resource manager.
+ * Initializes the cursor and starts iterating. When done iterating,
+ * the caller must explicitly call ttm_resource_cursor_fini().
+ *
+ * Return: The

[PATCH v5 04/12] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-06-18 Thread Thomas Hellström
To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.

Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.

Changes in previous series:
- Completely rework the functionality
- Avoid a NULL pointer dereference assigning manager->mem_type
- Remove some leftover code causing build problems
v2:
- For hitch bulk tail moves, store the mem_type in the cursor
  instead of with the manager.
v3:
- Remove leftover mem_type member from change in v2.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
 drivers/gpu/drm/ttm/ttm_resource.c | 89 ++
 drivers/gpu/drm/xe/xe_vm.c |  4 ++
 include/drm/ttm/ttm_resource.h | 56 ++--
 4 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3abfa66d72a2..97743993d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2420,6 +2420,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
return r;
 
+   ttm_lru_bulk_move_init(>lru_bulk_move);
+
vm->is_compute_context = false;
 
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2484,6 +2486,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
amdgpu_vm_fini_entities(vm);
 
return r;
@@ -2640,6 +2643,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
}
 
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
 }
 
 /**
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 9c8b6499edfb..a03090683e79 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,49 @@
 
 #include 
 
+/* Detach the cursor from the bulk move list*/
+static void
+ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
+{
+   cursor->bulk = NULL;
+   list_del_init(>bulk_link);
+}
+
+/* Move the cursor to the end of the bulk move list it's in */
+static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk,
+  struct ttm_resource_cursor 
*cursor)
+{
+   struct ttm_lru_bulk_move_pos *pos;
+
+   if (WARN_ON_ONCE(bulk != cursor->bulk)) {
+   list_del_init(>bulk_link);
+   return;
+   }
+
+   pos = >pos[cursor->mem_type][cursor->priority];
+   if (pos)
+   list_move(>hitch.link, >last->lru.link);
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
+/* Move all cursors attached to a bulk move to its end */
+static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_move_bulk_tail(bulk, cursor);
+}
+
+/* Remove a cursor from an empty bulk move list */
+static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
 /**
  * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
  * @cursor: The struct ttm_resource_cursor to finalize.
@@ -45,6 +88,7 @@ void ttm_resource_cursor_fini_locked(struct 
ttm_resource_cursor *cursor)
 {
lockdep_assert_held(>man->bdev->lru_lock);
list_del_init(>hitch.link);
+   ttm_resource_cursor_clear_bulk(cursor);
 }
 
 /**
@@ -73,9 +117,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor 
*cursor)
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
 {
memset(bulk, 0, sizeof(*bulk));
+   INIT_LIST_HEAD(>cursor_list);
 }
 EXPORT_SYMBOL(ttm_lru_bulk_move_init);
 
+/**
+ * ttm_lru_bulk_move_fini - finalize a bulk move structure
+ * @bdev: The struct ttm_device
+ * @bulk: the structure to finalize
+ *
+ * Sanity checks that bulk moves don't have any
+ * resources left and hence no cursors attached.
+ */
+void ttm_lru_bulk_move_

[PATCH v5 02/12] drm/ttm: Slightly clean up LRU list iteration

2024-06-18 Thread Thomas Hellström
To make the transition to using lru hitches easier,
simplify the ttm_resource_manager_next() interface to only take
the cursor and reuse ttm_resource_manager_next() functionality
from ttm_resource_manager_first().

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_resource.c | 48 +-
 include/drm/ttm/ttm_resource.h | 10 ---
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index db9a7a3717c4..8bfbc0e8 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -496,50 +496,44 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru;
-
lockdep_assert_held(>bdev->lru_lock);
 
-   for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   return NULL;
+   cursor->priority = 0;
+   cursor->man = man;
+   cursor->cur = >lru[cursor->priority];
+   return ttm_resource_manager_next(cursor);
 }
 
 /**
  * ttm_resource_manager_next
  *
- * @man: resource manager to iterate over
  * @cursor: cursor to record the position
- * @res: the current resource pointer
  *
- * Returns the next resource from the resource manager.
+ * Return: the next resource from the resource manager.
  */
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res)
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru = >lru;
+   struct ttm_resource_manager *man = cursor->man;
+   struct ttm_lru_item *lru;
 
lockdep_assert_held(>bdev->lru_lock);
 
-   list_for_each_entry_continue(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   ttm_lru_item_to_res(lru);
+   for (;;) {
+   lru = list_entry(cursor->cur, typeof(*lru), link);
+   list_for_each_entry_continue(lru, >lru[cursor->priority], 
link) {
+   if (ttm_lru_item_is_res(lru)) {
+   cursor->cur = >link;
+   return ttm_lru_item_to_res(lru);
+   }
}
 
+   if (++cursor->priority >= TTM_MAX_BO_PRIORITY)
+   break;
+
+   cursor->cur = >lru[cursor->priority];
+   }
+
return NULL;
 }
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 1511d91e290d..7d81fd5b5b83 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
 /**
  * struct ttm_resource_cursor
  *
+ * @man: The resource manager currently being iterated over.
+ * @cur: The list head the cursor currently points to.
  * @priority: the current priority
  *
  * Cursor to iterate over the resources in a manager.
  */
 struct ttm_resource_cursor {
+   struct ttm_resource_manager *man;
+   struct list_head *cur;
unsigned int priority;
 };
 
@@ -438,9 +442,7 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor);
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res);
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor);
 
 struct ttm_resource *
 ttm_lru_first_res_or_null(struct list_head *head);
@@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head);
  */
 #define ttm_resource_manager_for_each_res(man, cursor, res)\
for (res = ttm_resource_manager_first(man, cursor); res;\
-res = ttm_resource_manager_next(man, cursor, res))
+res = ttm_resource_manager_next(cursor))
 
 struct ttm_kmap_iter *
 ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
-- 
2.44.0



[PATCH v5 01/12] drm/ttm: Allow TTM LRU list nodes of different types

2024-06-18 Thread Thomas Hellström
To be able to handle list unlocking while traversing the LRU
list, we want the iterators not only to point to the next
position of the list traversal, but to insert themselves as
list nodes at that point to work around the fact that the
next node might otherwise disappear from the list while
the iterator is pointing to it.

These list nodes need to be easily distinguishable from other
list nodes so that others traversing the list can skip
over them.

So declare a struct ttm_lru_item, with a struct list_head member
and a type enum. This will slightly increase the size of a
struct ttm_resource.

Changes in previous series:
- Update enum ttm_lru_item_type documentation.
v3:
- Introduce ttm_lru_first_res_or_null()
  (Christian König, Thomas Hellström)
v5:
- Update also the TTM test code (Xe CI).

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c   |  6 +-
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |  2 +-
 drivers/gpu/drm/ttm/ttm_device.c  |  4 +-
 drivers/gpu/drm/ttm/ttm_resource.c| 89 +++
 include/drm/ttm/ttm_resource.h| 54 ++-
 5 files changed, 129 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
index 1f8a4f8adc92..ddbb895feaa2 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
@@ -265,7 +265,7 @@ static void ttm_bo_unreserve_basic(struct kunit *test)
 
man = ttm_manager_type(priv->ttm_dev, mem_type);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >lru[bo->priority]), 1);
+   list_is_last(>lru.link, >lru[bo->priority]), 
1);
 
ttm_resource_free(bo, );
ttm_resource_free(bo, );
@@ -302,11 +302,11 @@ static void ttm_bo_unreserve_pinned(struct kunit *test)
err = ttm_resource_alloc(bo, place, );
KUNIT_ASSERT_EQ(test, err, 0);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >ttm_dev->pinned), 1);
+   list_is_last(>lru.link, >ttm_dev->pinned), 
1);
 
ttm_bo_unreserve(bo);
KUNIT_ASSERT_EQ(test,
-   list_is_last(>lru, >ttm_dev->pinned), 1);
+   list_is_last(>lru.link, >ttm_dev->pinned), 
1);
 
ttm_resource_free(bo, );
ttm_resource_free(bo, );
diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c 
b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
index 029e1f094bb0..681ac8b746ef 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
@@ -198,7 +198,7 @@ static void ttm_resource_fini_basic(struct kunit *test)
ttm_resource_init(bo, place, res);
ttm_resource_fini(man, res);
 
-   KUNIT_ASSERT_TRUE(test, list_empty(>lru));
+   KUNIT_ASSERT_TRUE(test, list_empty(>lru.link));
KUNIT_ASSERT_EQ(test, man->usage, 0);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 434cf0258000..09411978a13a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -274,14 +274,14 @@ static void ttm_device_clear_lru_dma_mappings(struct 
ttm_device *bdev,
struct ttm_resource *res;
 
spin_lock(>lru_lock);
-   while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
+   while ((res = ttm_lru_first_res_or_null(list))) {
struct ttm_buffer_object *bo = res->bo;
 
/* Take ref against racing releases once lru_lock is unlocked */
if (!ttm_bo_get_unless_zero(bo))
continue;
 
-   list_del_init(>lru);
+   list_del_init(>resource->lru.link);
spin_unlock(>lru_lock);
 
if (bo->ttm)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 4a66b851b67d..db9a7a3717c4 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -70,8 +70,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
dma_resv_assert_held(pos->last->bo->base.resv);
 
man = ttm_manager_type(pos->first->bo->bdev, i);
-   list_bulk_move_tail(>lru[j], >first->lru,
-   >last->lru);
+   list_bulk_move_tail(>lru[j], >first->lru.link,
+   >last->lru.link);
}
}
 }
@@ -84,14 +84,38 @@ ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, 
struct ttm_resource *res)
return >pos[res->mem_type][res->bo->priori

[PATCH v5 00/12] TTM shrinker helpers and xe buffer object shrinker

2024-06-18 Thread Thomas Hellström
This series implements TTM shrinker / eviction helpers and an xe bo
shrinker. It builds on two previous series, *and obsoletes these*. First

https://www.mail-archive.com/dri-devel@lists.freedesktop.org/msg484425.html

Second the previous TTM shrinker series

https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/

Where the comment about layering
https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9

now addressed, and this version also implements shmem objects for backup
rather than direct swap-cache insertions, which was used in the previuos
series. It turns out that with per-page backup / shrinking, shmem objects
appears to work just as well as direct swap-cache insertions with the
added benefit that was introduced in the previous TTM shrinker series to
avoid running out of swap entries isn't really needed.

Patch 1-4 implements restartable LRU list iteration.

Patch 5 implements a LRU walker + resv locking helper

Patch 6 moves TTM swapping over to the walker.

Patch 7 moves TTM eviction over to the walker.

Patch 8 could in theory be skipped but introduces a possibility to easily
add or test multiple backup backends, like the direct swap-cache
insertion or even files into fast dedicated nvme storage for for example.

Patch 9 introduces helpers in the ttm_pool code for page-by-page shrinking
and recovery. It avoids having to temporarily allocate a huge amount of
memory to be able to shrink a buffer object. It also introduces the
possibility to immediately write-back pages if needed, since that tends
to be a bit delayed when left to kswapd.

Patch 10 Adds a simple error injection to the above code to help increase
test coverage.

Patch 11 Implements an xe bo shrinker and a common helper in TTM for
shrinking.

Patch 12-21 are really a separate POC series, for introducing drm_exec locking
in TTM. The patch touches both drm_exec and dma-buf and is for now marked as
an RFC:

Patch 12 Increases (removes) the XE_PL_TT watermark.

v2:
- Squash obsolete revision history in the patch commit messages.
- Fix a couple of review comments by Christian
- Don't store the mem_type in the TTM managers but in the
  resource cursor.
- Rename introduced TTM *back_up* function names to *backup*
- Add ttm pool recovery fault injection.
- Shrinker xe kunit test
- Various bugfixes

v3:
- Address some review comments from Matthew Brost and Christian König.
- Use the restartable LRU walk for TTM swapping and eviction.
- Provide a POC drm_exec locking implementation for exhaustive
  eviction. (Christian König).

v4:
- Remove the RFC exhaustive eviction part. While the path to exhaustive
  eviction is pretty clear and demonstrated in v3, there is still some
  drm_exec work that needs to be agreed and implemented.
- Add shrinker power management. On some hw we need to wake when shrinking.
- Fix the lru walker helper for -EALREADY errors.
- Add drm/xe: Increase the XE_PL_TT watermark.

v5:
- Update also TTM kunit tests
- Handle ghost- and zombie objects in the shrinker.
- A couple of compile- and UAF fixes reported by Kernel Build Robot and
  Dan Carpenter.

Cc: Somalapuram Amaranath 
Cc: Christian König 
Cc: Matthew Brost 
Cc: 

Thomas Hellström (12):
  drm/ttm: Allow TTM LRU list nodes of different types
  drm/ttm: Slightly clean up LRU list iteration
  drm/ttm: Use LRU hitches
  drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist
moves
  drm/ttm: Provide a generic LRU walker helper
  drm/ttm: Use the LRU walker helper for swapping
  drm/ttm: Use the LRU walker for eviction
  drm/ttm: Add a virtual base class for graphics memory backup
  drm/ttm/pool: Provide a helper to shrink pages
  drm/ttm: Use fault-injection to test error paths
  drm/ttm, drm/xe: Add a shrinker for xe bos
  drm/xe: Increase the XE_PL_TT watermark

 drivers/gpu/drm/Kconfig   |  10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   4 +
 drivers/gpu/drm/ttm/Makefile  |   2 +-
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c   |   6 +-
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |   2 +-
 drivers/gpu/drm/ttm/ttm_backup_shmem.c| 139 ++
 drivers/gpu/drm/ttm/ttm_bo.c  | 463 --
 drivers/gpu/drm/ttm/ttm_bo_util.c | 212 
 drivers/gpu/drm/ttm/ttm_device.c  |  29 +-
 drivers/gpu/drm/ttm/ttm_pool.c| 412 +++-
 drivers/gpu/drm/ttm/ttm_resource.c| 264 --
 drivers/gpu/drm/ttm/ttm_tt.c  |  37 ++
 drivers/gpu/drm/xe/Makefile   |   1 +
 drivers/gpu/drm/xe/tests/xe_bo.c  | 118 +
 drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
 drivers/gpu/drm/xe/xe_bo.c| 155 +-
 drivers/gpu/drm/xe/xe_bo.h|  26 +
 drivers/gpu/drm/xe/xe_device.c|   8

[PATCH v4 11/12] drm/ttm, drm/xe: Add a shrinker for xe bos

2024-06-14 Thread Thomas Hellström
Rather than relying on the TTM watermark accounting add a shrinker
for xe_bos in TT or system memory.

Leverage the newly added TTM per-page shrinking and shmem backup
support.

Although xe doesn't fully support WONTNEED (purgeable) bos yet,
introduce and add shrinker support for purgeable ttm_tts.

v2:
- Cleanups bugfixes and a KUNIT shrinker test.
- Add writeback support, and activate if kswapd.
v3:
- Move the try_shrink() helper to core TTM.
- Minor cleanups.
v4:
- Add runtime pm for the shrinker. Shrinking may require an active
  device for CCS metadata copying.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c |  67 ++
 drivers/gpu/drm/xe/Makefile   |   1 +
 drivers/gpu/drm/xe/tests/xe_bo.c  | 118 +++
 drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
 drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
 drivers/gpu/drm/xe/xe_bo.c| 139 +++--
 drivers/gpu/drm/xe/xe_bo.h|   4 +
 drivers/gpu/drm/xe/xe_device.c|   8 +
 drivers/gpu/drm/xe/xe_device_types.h  |   2 +
 drivers/gpu/drm/xe/xe_shrinker.c  | 287 ++
 drivers/gpu/drm/xe/xe_shrinker.h  |  18 ++
 include/drm/ttm/ttm_bo.h  |   3 +
 12 files changed, 633 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_shrinker.c
 create mode 100644 drivers/gpu/drm/xe/xe_shrinker.h

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 45fcaf6f8644..44711f422459 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -913,3 +913,70 @@ long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, 
struct ttm_device *bdev,
ttm_resource_cursor_fini();
return sofar;
 }
+EXPORT_SYMBOL(ttm_lru_walk_for_evict);
+
+/**
+ * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
+ * @walk: The struct xe_ttm_lru_walk that describes the walk.
+ * @bo: The buffer object.
+ * @purge: Whether to attempt to purge the bo content since it's no
+ * longer needed.
+ * @writeback: If !@purge, attempt to write out to persistent storage.
+ *
+ * The function uses the ttm_tt_back_up functionality to back up or
+ * purge a struct ttm_tt. If the bo is not in system, it's first
+ * moved there.
+ *
+ * Return: The number of pages shrunken or purged, or
+ * negative error code on failure.
+ */
+long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+  bool purge, bool writeback)
+{
+   static const struct ttm_place sys_placement_flags = {
+   .fpfn = 0,
+   .lpfn = 0,
+   .mem_type = TTM_PL_SYSTEM,
+   .flags = 0,
+   };
+   static struct ttm_placement sys_placement = {
+   .num_placement = 1,
+   .placement = _placement_flags,
+   };
+   struct ttm_operation_ctx *ctx = walk->ctx;
+   struct ttm_tt *tt = bo->ttm;
+   long lret;
+
+   dma_resv_assert_held(bo->base.resv);
+
+   if (!tt || !ttm_tt_is_populated(tt))
+   return 0;
+
+   if (bo->resource->mem_type != TTM_PL_SYSTEM) {
+   int ret = ttm_bo_validate(bo, _placement, ctx);
+
+   if (ret) {
+   if (ret == -EINTR || ret == -EDEADLK ||
+   ret == -ERESTARTSYS)
+   return ret;
+   return 0;
+   }
+   }
+
+   lret = ttm_bo_wait_ctx(bo, ctx);
+   if (lret < 0) {
+   if (lret == -ERESTARTSYS)
+   return lret;
+   return 0;
+   }
+
+   if (bo->deleted)
+   lret = ttm_tt_backup(bo->bdev, tt, true, writeback);
+   else
+   lret = ttm_tt_backup(bo->bdev, tt, purge, writeback);
+   if (lret < 0 && lret != -EINTR)
+   return 0;
+
+   return lret;
+}
+EXPORT_SYMBOL(ttm_bo_try_shrink);
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1905a80e61e3..46304592d94d 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -107,6 +107,7 @@ xe-y += xe_bb.o \
xe_ring_ops.o \
xe_sa.o \
xe_sched_job.o \
+   xe_shrinker.o \
xe_step.o \
xe_sync.o \
xe_tile.o \
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9f3c02826464..7576d362020f 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,8 @@
 #include 
 #include 
 
+#include 
+
 #include "tests/xe_bo_test.h"
 #include "tests/xe_pci_test.h"
 #include "tests/xe_test.h"
@@ -350,3 +352,119 @@ void xe_bo_evict_kunit(struct kunit *test)
xe_call_for_each_device(evict_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
+
+struct xe_bo_link {
+  

[PATCH v4 12/12] drm/xe: Increase the XE_PL_TT watermark

2024-06-14 Thread Thomas Hellström
The XE_PL_TT watermark was set to 50% of system memory.
The idea behind that was unclear since the net effect is that
TT memory will be evicted to TTM_PL_SYSTEM memory if that
watermark is exceeded, requiring PPGTT rebinds and dma
remapping. But there is no similar watermark for TTM_PL_SYSTEM
memory.

The TTM functionality that tries to swap out system memory to
shmem objects if a 50% limit of total system memory is reached
is orthogonal to this, and with the shrinker added, it's no
longer in effect.

Replace the 50% TTM_PL_TT limit with a 100% limit, in effect
allowing all graphics memory to be bound to the device unless it
has been swapped out by the shrinker.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c 
b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 9844a8edbfe1..d38b91872da3 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -108,9 +108,8 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe)
u64 gtt_size;
 
si_meminfo();
+   /* Potentially restrict amount of TT memory here. */
gtt_size = (u64)si.totalram * si.mem_unit;
-   /* TTM limits allocation of all TTM devices by 50% of system memory */
-   gtt_size /= 2;
 
man->use_tt = true;
man->func = _ttm_sys_mgr_func;
-- 
2.44.0



[PATCH v4 09/12] drm/ttm/pool: Provide a helper to shrink pages

2024-06-14 Thread Thomas Hellström
Provide a helper to shrink ttm_tt page-vectors on a per-page
basis. A ttm_backup backend could then in theory get away with
allocating a single temporary page for each struct ttm_tt.

This is accomplished by splitting larger pages before trying to
back them up.

In the future we could allow ttm_backup to handle backing up
large pages as well, but currently there's no benefit in
doing that, since the shmem backup backend would have to
split those anyway to avoid allocating too much temporary
memory, and if the backend instead inserts pages into the
swap-cache, those are split on reclaim by the core.

Due to potential backup- and recover errors, allow partially swapped
out struct ttm_tt's, although mark them as swapped out stopping them
from being swapped out a second time. More details in the ttm_pool.c
DOC section.

v2:
- A couple of cleanups and error fixes in ttm_pool_back_up_tt.
- s/back_up/backup/
- Add a writeback parameter to the exported interface.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 397 +++--
 drivers/gpu/drm/ttm/ttm_tt.c   |  37 +++
 include/drm/ttm/ttm_pool.h |   5 +
 include/drm/ttm/ttm_tt.h   |  20 ++
 4 files changed, 446 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..38e50cf81b0a 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -41,6 +41,7 @@
 #include 
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -58,6 +59,32 @@ struct ttm_pool_dma {
unsigned long vaddr;
 };
 
+/**
+ * struct ttm_pool_tt_restore - State representing restore from backup
+ * @alloced_pages: Total number of already allocated pages for the ttm_tt.
+ * @restored_pages: Number of (sub) pages restored from swap for this
+ *  chunk of 1 << @order pages.
+ * @first_page: The ttm page ptr representing for @old_pages[0].
+ * @caching_divide: Page pointer where subsequent pages are cached.
+ * @old_pages: Backup copy of page pointers that were replaced by the new
+ *page allocation.
+ * @pool: The pool used for page allocation while restoring.
+ * @order: The order of the last page allocated while restoring.
+ *
+ * Recovery from backup might fail when we've recovered less than the
+ * full ttm_tt. In order not to loose any data (yet), keep information
+ * around that allows us to restart a failed ttm backup recovery.
+ */
+struct ttm_pool_tt_restore {
+   pgoff_t alloced_pages;
+   pgoff_t restored_pages;
+   struct page **first_page;
+   struct page **caching_divide;
+   struct ttm_pool *pool;
+   unsigned int order;
+   struct page *old_pages[];
+};
+
 static unsigned long page_pool_size;
 
 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
@@ -354,11 +381,102 @@ static unsigned int ttm_pool_page_order(struct ttm_pool 
*pool, struct page *p)
return p->private;
 }
 
+/*
+ * To be able to insert single pages into backup directly,
+ * we need to split multi-order page allocations and make them look
+ * like single-page allocations.
+ */
+static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
+{
+   unsigned int order = ttm_pool_page_order(pool, p);
+   pgoff_t nr;
+
+   if (!order)
+   return;
+
+   split_page(p, order);
+   nr = 1UL << order;
+   while (nr--)
+   (p++)->private = 0;
+}
+
+/**
+ * DOC: Partial backup and restoration of a struct ttm_tt.
+ *
+ * Swapout using ttm_backup::ops::backup_page() and swapin using
+ * ttm_backup::ops::copy_backed_up_page() may fail.
+ * The former most likely due to lack of swap-space or memory, the latter due
+ * to lack of memory or because of signal interruption during waits.
+ *
+ * Backupfailure is easily handled by using a ttm_tt pages vector that holds
+ * both swap entries and page pointers. This has to be taken into account when
+ * restoring such a ttm_tt from backup, and when freeing it while backed up.
+ * When restoring, for simplicity, new pages are actually allocated from the
+ * pool and the contents of any old pages are copied in and then the old pages
+ * are released.
+ *
+ * For restoration failures, the struct ttm_pool_tt_restore holds sufficient 
state
+ * to be able to resume an interrupted restore, and that structure is freed 
once
+ * the restoration is complete. If the struct ttm_tt is destroyed while there
+ * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
+ * care of.
+ */
+
+static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
+{
+   return restore && restore->restored_pages < (1 << restore->order);
+}
+
+static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
+  struct ttm_ba

[PATCH v4 02/12] drm/ttm: Slightly clean up LRU list iteration

2024-06-14 Thread Thomas Hellström
To make the transition to using lru hitches easier,
simplify the ttm_resource_manager_next() interface to only take
the cursor and reuse ttm_resource_manager_next() functionality
from ttm_resource_manager_first().

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_resource.c | 48 +-
 include/drm/ttm/ttm_resource.h | 10 ---
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index db9a7a3717c4..8bfbc0e8 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -496,50 +496,44 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru;
-
lockdep_assert_held(>bdev->lru_lock);
 
-   for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   return NULL;
+   cursor->priority = 0;
+   cursor->man = man;
+   cursor->cur = >lru[cursor->priority];
+   return ttm_resource_manager_next(cursor);
 }
 
 /**
  * ttm_resource_manager_next
  *
- * @man: resource manager to iterate over
  * @cursor: cursor to record the position
- * @res: the current resource pointer
  *
- * Returns the next resource from the resource manager.
+ * Return: the next resource from the resource manager.
  */
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res)
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor)
 {
-   struct ttm_lru_item *lru = >lru;
+   struct ttm_resource_manager *man = cursor->man;
+   struct ttm_lru_item *lru;
 
lockdep_assert_held(>bdev->lru_lock);
 
-   list_for_each_entry_continue(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   return ttm_lru_item_to_res(lru);
-   }
-
-   for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY;
-++cursor->priority)
-   list_for_each_entry(lru, >lru[cursor->priority], link) {
-   if (ttm_lru_item_is_res(lru))
-   ttm_lru_item_to_res(lru);
+   for (;;) {
+   lru = list_entry(cursor->cur, typeof(*lru), link);
+   list_for_each_entry_continue(lru, >lru[cursor->priority], 
link) {
+   if (ttm_lru_item_is_res(lru)) {
+   cursor->cur = >link;
+   return ttm_lru_item_to_res(lru);
+   }
}
 
+   if (++cursor->priority >= TTM_MAX_BO_PRIORITY)
+   break;
+
+   cursor->cur = >lru[cursor->priority];
+   }
+
return NULL;
 }
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 1511d91e290d..7d81fd5b5b83 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item)
 /**
  * struct ttm_resource_cursor
  *
+ * @man: The resource manager currently being iterated over.
+ * @cur: The list head the cursor currently points to.
  * @priority: the current priority
  *
  * Cursor to iterate over the resources in a manager.
  */
 struct ttm_resource_cursor {
+   struct ttm_resource_manager *man;
+   struct list_head *cur;
unsigned int priority;
 };
 
@@ -438,9 +442,7 @@ struct ttm_resource *
 ttm_resource_manager_first(struct ttm_resource_manager *man,
   struct ttm_resource_cursor *cursor);
 struct ttm_resource *
-ttm_resource_manager_next(struct ttm_resource_manager *man,
- struct ttm_resource_cursor *cursor,
- struct ttm_resource *res);
+ttm_resource_manager_next(struct ttm_resource_cursor *cursor);
 
 struct ttm_resource *
 ttm_lru_first_res_or_null(struct list_head *head);
@@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head);
  */
 #define ttm_resource_manager_for_each_res(man, cursor, res)\
for (res = ttm_resource_manager_first(man, cursor); res;\
-res = ttm_resource_manager_next(man, cursor, res))
+res = ttm_resource_manager_next(cursor))
 
 struct ttm_kmap_iter *
 ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
-- 
2.44.0



[PATCH v4 05/12] drm/ttm: Provide a generic LRU walker helper

2024-06-14 Thread Thomas Hellström
Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOMEM, so that the driver will need to back
off and possibly retry without being able to keep the
ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 145 ++
 include/drm/ttm/ttm_bo.h  |  32 +++
 2 files changed, 177 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0b3f4267130c..45fcaf6f8644 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -768,3 +768,148 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
ttm_tt_destroy(bo->bdev, ttm);
return ret;
 }
+
+static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
+struct ttm_buffer_object *bo,
+bool *needs_unlock)
+{
+   struct ttm_operation_ctx *ctx = walk->ctx;
+
+   *needs_unlock = false;
+
+   if (dma_resv_trylock(bo->base.resv)) {
+   *needs_unlock = true;
+   return true;
+   }
+
+   if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
+   dma_resv_assert_held(bo->base.resv);
+   return true;
+   }
+
+   return false;
+}
+
+static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
+  struct ttm_buffer_object *bo,
+  bool *needs_unlock)
+{
+   struct dma_resv *resv = bo->base.resv;
+   int ret;
+
+   if (walk->ctx->interruptible)
+   ret = dma_resv_lock_interruptible(resv, walk->ticket);
+   else
+   ret = dma_resv_lock(resv, walk->ticket);
+
+   if (!ret) {
+   *needs_unlock = true;
+   /* Only a single ticketlock per loop. */
+   walk->ticket = NULL;
+   } else if (ret == -EDEADLK) {
+   /* Caller needs to exit the ww transaction. */
+   ret = -ENOSPC;
+   }
+
+   return ret;
+}
+
+static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
+{
+   if (locked)
+   dma_resv_unlock(bo->base.resv);
+}
+
+/**
+ * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
+ * valid items.
+ * @walk: describe the walks and actions taken
+ * @bdev: The TTM device.
+ * @man: The struct ttm_resource manager whose LRU lists we're walking.
+ * @target: The end condition for the walk.
+ *
+ * The LRU lists of @man are walk, and for each struct ttm_resource 
encountered,
+ * the corresponding ttm_buffer_object is locked and taken a reference on, and
+ * the LRU lock is dropped. the LRU lock may be dropped before locking and, in
+ * that case, it's verified that the item actually remains on the LRU list 
after
+ * the lock, and that the buffer object didn't switch resource in between.
+ *
+ * With a locked object, the actions indicated by @walk->process_bo are
+ * performed, and after that, the bo is unlocked, the refcount dropped and the
+ * next struct ttm_resource is processed. Here, the walker relies on
+ * TTM's restartable LRU list implementation.
+ *
+ * Typically @walk->process_bo() would return the number of pages evicted,
+ * swapped or shrunken, so that when the total exceeds @target, or when the
+ * LRU list has been walked in full, iteration is terminated. It's also 
terminated
+ * on error. Note that the definition of @target is done by the caller, it
+ * could have a different meaning than the number of pages.
+ *
+ * Note that the way dma_resv individualization is done, locking needs to be 
done
+ * either with the LRU lock held (trylocking only) or with a reference on the
+ * object.
+ *
+ * Return: The progress made towards target or negative error code on error.
+ */
+long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
+   struct ttm_resource_manager *man, long target)
+{
+   struct ttm_resource_cursor cursor;
+   struct ttm_resource *res;
+   long sofar = 0;
+   long lret;
+
+   spin_lock(>lru_lock);
+   ttm_resource_manager_for_each_res(man, , res) {
+   struct ttm_buffer_object *bo = res->bo;
+   bool bo_needs_unlock = false;
+   bool bo_locked = false;
+   int mem_type;
+
+   if (!bo || bo->resource != res)
+  

[PATCH v4 07/12] drm/ttm: Use the LRU walker for eviction

2024-06-14 Thread Thomas Hellström
Use the LRU walker for eviction. This helps
removing a lot of code with weird locking
semantics.

The functionality is slightly changed so that
when trylocked buffer objects are exhausted, we
continue to interleave walks with ticket-locks while
there is still progress made. The list walks are
not restarted in-between evictions.

Also provide a separate ttm_bo_evict_first()
function for its single user. The context of that
user allows sleeping dma_resv locks.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 350 -
 drivers/gpu/drm/ttm/ttm_resource.c |  20 +-
 include/drm/ttm/ttm_bo.h   |   8 +-
 3 files changed, 145 insertions(+), 233 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 63a91b77f7da..316afe19a325 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct 
ttm_buffer_object *bo)
dma_resv_iter_end();
 }
 
-/**
- * ttm_bo_cleanup_refs
- * If bo idle, remove from lru lists, and unref.
- * If not idle, block if possible.
- *
- * Must be called with lru_lock and reservation held, this function
- * will drop the lru lock and optionally the reservation lock before returning.
- *
- * @bo:The buffer object to clean-up
- * @interruptible: Any sleeps should occur interruptibly.
- * @no_wait_gpu:   Never wait for gpu. Return -EBUSY instead.
- * @unlock_resv:   Unlock the reservation lock as well.
- */
-
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-  bool interruptible, bool no_wait_gpu,
-  bool unlock_resv)
-{
-   struct dma_resv *resv = >base._resv;
-   int ret;
-
-   if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
-   ret = 0;
-   else
-   ret = -EBUSY;
-
-   if (ret && !no_wait_gpu) {
-   long lret;
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-
-   lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
-interruptible,
-30 * HZ);
-
-   if (lret < 0)
-   return lret;
-   else if (lret == 0)
-   return -EBUSY;
-
-   spin_lock(>bdev->lru_lock);
-   if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
-   /*
-* We raced, and lost, someone else holds the 
reservation now,
-* and is probably busy in ttm_bo_cleanup_memtype_use.
-*
-* Even if it's not the case, because we finished 
waiting any
-* delayed destruction would succeed, so just return 
success
-* here.
-*/
-   spin_unlock(>bdev->lru_lock);
-   return 0;
-   }
-   ret = 0;
-   }
-
-   if (ret) {
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-   spin_unlock(>bdev->lru_lock);
-   return ret;
-   }
-
-   spin_unlock(>bdev->lru_lock);
-   ttm_bo_cleanup_memtype_use(bo);
-
-   if (unlock_resv)
-   dma_resv_unlock(bo->base.resv);
-
-   return 0;
-}
-
 /*
  * Block for the dma_resv object to become idle, lock the buffer and clean up
  * the resource and tt object.
@@ -505,151 +431,154 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object 
*bo,
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
-/*
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
+/**
+ * ttm_bo_evict_first() - Evict the first bo on the manager's LRU list.
+ * @bdev: The ttm device.
+ * @man: The manager whose bo to evict.
+ * @ctx: The TTM operation ctx governing the eviction.
  *
- * b. Otherwise, trylock it.
+ * Return: 0 if successful or the resource disappeared. Negative error code on 
error.
  */
-static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-  struct ttm_operation_ctx *ctx,
-  const struct ttm_place *place,
-  bool *locked, bool *busy)
+int ttm_bo_evict_first(struct ttm_device *bde

[PATCH v4 10/12] drm/ttm: Use fault-injection to test error paths

2024-06-14 Thread Thomas Hellström
Use fault-injection to test partial TTM swapout and interrupted swapin.
Return -EINTR for swapin to test the callers ability to handle and
restart the swapin, and on swapout perform a partial swapout to test that
the swapin and release_shrunken functionality.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/Kconfig| 10 ++
 drivers/gpu/drm/ttm/ttm_pool.c | 17 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 981f43d4ca8c..5f010ec31002 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -272,6 +272,16 @@ config DRM_GPUVM
  GPU-VM representation providing helpers to manage a GPUs virtual
  address space
 
+config DRM_TTM_BACKUP_FAULT_INJECT
+   bool "Enable fault injection during TTM backup"
+   depends on DRM_TTM
+   default n
+   help
+ Inject recoverable failures during TTM backup and recovery of
+ backed-up objects. For DRM driver developers only.
+
+ If in doubt, choose N.
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 38e50cf81b0a..d32a1f2e5e50 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -431,6 +431,7 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore 
*restore,
   struct ttm_backup *backup,
   struct ttm_operation_ctx *ctx)
 {
+   static unsigned long __maybe_unused swappedin;
unsigned int i, nr = 1 << restore->order;
int ret = 0;
 
@@ -446,6 +447,13 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore 
*restore,
if (handle == 0)
continue;
 
+   if (IS_ENABLED(CONFIG_DRM_TTM_BACKUP_FAULT_INJECT) &&
+   ctx->interruptible &&
+   ++swappedin % 100 == 0) {
+   ret = -EINTR;
+   break;
+   }
+
ret = backup->ops->copy_backed_up_page
(backup, restore->first_page[i],
 handle, ctx->interruptible);
@@ -892,7 +900,14 @@ long ttm_pool_backup_tt(struct ttm_pool *pool, struct 
ttm_tt *ttm, bool purge,
 
alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | 
__GFP_RETRY_MAYFAIL;
 
-   for (i = 0; i < ttm->num_pages; ++i) {
+   num_pages = ttm->num_pages;
+
+   /* Pretend doing fault injection by shrinking only half of the pages. */
+
+   if (IS_ENABLED(CONFIG_DRM_TTM_BACKUP_FAULT_INJECT))
+   num_pages = DIV_ROUND_UP(num_pages, 2);
+
+   for (i = 0; i < num_pages; ++i) {
page = ttm->pages[i];
if (unlikely(!page))
continue;
-- 
2.44.0



[PATCH v4 06/12] drm/ttm: Use the LRU walker helper for swapping

2024-06-14 Thread Thomas Hellström
Rework the TTM swapping to use the LRU walker helper.
This helps fixing up the ttm_bo_swapout() interface
to be consistent about not requiring any locking.

For now mimic the current behaviour of using trylock
only. We could be using ticket-locks here but defer
that until it's deemed necessary. The TTM swapout
functionality is a bit weird anyway since it
alternates between memory types without exhausting
TTM_PL_SYSTEM first.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 112 +--
 drivers/gpu/drm/ttm/ttm_device.c |  30 ++---
 include/drm/ttm/ttm_bo.h |   5 +-
 3 files changed, 83 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 43eda720657f..63a91b77f7da 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1118,11 +1118,23 @@ int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, 
struct ttm_operation_ctx *ctx)
 }
 EXPORT_SYMBOL(ttm_bo_wait_ctx);
 
-int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-  gfp_t gfp_flags)
+/**
+ * struct ttm_bo_swapout_walk - Parameters for the swapout walk
+ */
+struct ttm_bo_swapout_walk {
+   /** @walk: The walk base parameters. */
+   struct ttm_lru_walk walk;
+   /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */
+   gfp_t gfp_flags;
+};
+
+static long
+ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 {
-   struct ttm_place place;
-   bool locked;
+   struct ttm_place place = {.mem_type = bo->resource->mem_type};
+   struct ttm_bo_swapout_walk *swapout_walk =
+   container_of(walk, typeof(*swapout_walk), walk);
+   struct ttm_operation_ctx *ctx = walk->ctx;
long ret;
 
/*
@@ -1131,28 +1143,29 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * The driver may use the fact that we're moving from SYSTEM
 * as an indication that we're about to swap out.
 */
-   memset(, 0, sizeof(place));
-   place.mem_type = bo->resource->mem_type;
-   if (!ttm_bo_evict_swapout_allowable(bo, ctx, , , NULL))
-   return -EBUSY;
+   if (!bo->bdev->funcs->eviction_valuable(bo, )) {
+   ret = -EBUSY;
+   goto out;
+   }
 
if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL ||
-   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED ||
-   !ttm_bo_get_unless_zero(bo)) {
-   if (locked)
-   dma_resv_unlock(bo->base.resv);
-   return -EBUSY;
+   bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) {
+   ret = -EBUSY;
+   goto out;
}
 
if (bo->deleted) {
-   ret = ttm_bo_cleanup_refs(bo, false, false, locked);
-   ttm_bo_put(bo);
-   return ret == -EBUSY ? -ENOSPC : ret;
-   }
+   pgoff_t num_pages = bo->ttm->num_pages;
 
-   /* TODO: Cleanup the locking */
-   spin_unlock(>bdev->lru_lock);
+   ret = ttm_bo_wait_ctx(bo, ctx);
+   if (ret)
+   goto out;
+
+   ttm_bo_cleanup_memtype_use(bo);
+   ret = num_pages;
+   goto out;
+   }
 
/*
 * Move to system cached
@@ -1164,12 +1177,13 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
memset(, 0, sizeof(hop));
place.mem_type = TTM_PL_SYSTEM;
ret = ttm_resource_alloc(bo, , _mem);
-   if (unlikely(ret))
+   if (ret)
goto out;
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, );
-   if (unlikely(ret != 0)) {
-   WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput 
- likely driver bug.\n");
+   if (ret) {
+   WARN(ret == -EMULTIHOP,
+"Unexpected multihop in swapout - likely driver 
bug.\n");
ttm_resource_free(bo, _mem);
goto out;
}
@@ -1179,30 +1193,54 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
ttm_operation_ctx *ctx,
 * Make sure BO is idle.
 */
ret = ttm_bo_wait_ctx(bo, ctx);
-   if (unlikely(ret != 0))
+   if (ret)
goto out;
 
ttm_bo_unmap_virtual(bo);
-
-   /*
-* Swap out. Buffer will be swapped in again as soon as
-* anyone tries to access a ttm page.
-*/
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notif

[PATCH v4 08/12] drm/ttm: Add a virtual base class for graphics memory backup

2024-06-14 Thread Thomas Hellström
Initially intended for experimenting with different backup
solutions (shmem vs direct swap cache insertion), abstract
the backup destination using a virtual base class.

Also provide a sample implementation for shmem.

While when settling on a preferred backup solution, one could
perhaps skip the abstraction, this functionality may actually
come in handy for configurable dedicated graphics memory
backup to fast nvme files or similar, whithout affecting
swap-space. Could indeed be useful for VRAM backup on S4 and
other cases.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/ttm/Makefile   |   2 +-
 drivers/gpu/drm/ttm/ttm_backup_shmem.c | 137 +
 include/drm/ttm/ttm_backup.h   | 136 
 3 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_backup_shmem.c
 create mode 100644 include/drm/ttm/ttm_backup.h

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index dad298127226..5e980dd90e41 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,7 +4,7 @@
 
 ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \
-   ttm_device.o ttm_sys_manager.o
+   ttm_device.o ttm_sys_manager.o ttm_backup_shmem.o
 ttm-$(CONFIG_AGP) += ttm_agp_backend.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_backup_shmem.c 
b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
new file mode 100644
index ..79c2f552863a
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include 
+#include 
+
+/**
+ * struct ttm_backup_shmem - A shmem based ttm_backup subclass.
+ * @backup: The base struct ttm_backup
+ * @filp: The associated shmem object
+ */
+struct ttm_backup_shmem {
+   struct ttm_backup backup;
+   struct file *filp;
+};
+
+static struct ttm_backup_shmem *to_backup_shmem(struct ttm_backup *backup)
+{
+   return container_of(backup, struct ttm_backup_shmem, backup);
+}
+
+static void ttm_backup_shmem_drop(struct ttm_backup *backup, unsigned long 
handle)
+{
+   handle -= 1;
+   shmem_truncate_range(file_inode(to_backup_shmem(backup)->filp), handle,
+handle + 1);
+}
+
+static int ttm_backup_shmem_copy_page(struct ttm_backup *backup, struct page 
*dst,
+ unsigned long handle, bool killable)
+{
+   struct file *filp = to_backup_shmem(backup)->filp;
+   struct address_space *mapping = filp->f_mapping;
+   struct folio *from_folio;
+
+   handle -= 1;
+   from_folio = shmem_read_folio(mapping, handle);
+   if (IS_ERR(from_folio))
+   return PTR_ERR(from_folio);
+
+   /* Note: Use drm_memcpy_from_wc? */
+   copy_highpage(dst, folio_file_page(from_folio, handle));
+   folio_put(from_folio);
+
+   return 0;
+}
+
+static unsigned long
+ttm_backup_shmem_backup_page(struct ttm_backup *backup, struct page *page,
+bool writeback, pgoff_t i, gfp_t page_gfp,
+gfp_t alloc_gfp)
+{
+   struct file *filp = to_backup_shmem(backup)->filp;
+   struct address_space *mapping = filp->f_mapping;
+   unsigned long handle = 0;
+   struct folio *to_folio;
+   int ret;
+
+   to_folio = shmem_read_folio_gfp(mapping, i, alloc_gfp);
+   if (IS_ERR(to_folio))
+   return handle;
+
+   folio_mark_accessed(to_folio);
+   folio_lock(to_folio);
+   folio_mark_dirty(to_folio);
+   copy_highpage(folio_file_page(to_folio, i), page);
+   handle = i + 1;
+
+   if (writeback && !folio_mapped(to_folio) && 
folio_clear_dirty_for_io(to_folio)) {
+   struct writeback_control wbc = {
+   .sync_mode = WB_SYNC_NONE,
+   .nr_to_write = SWAP_CLUSTER_MAX,
+   .range_start = 0,
+   .range_end = LLONG_MAX,
+   .for_reclaim = 1,
+   };
+   folio_set_reclaim(to_folio);
+   ret = mapping->a_ops->writepage(folio_page(to_folio, 0), );
+   if (!folio_test_writeback(to_folio))
+   folio_clear_reclaim(to_folio);
+   /* If writepage succeeds, it unlocks the folio */
+   if (ret)
+   folio_unlock(to_folio);
+   } else {
+   folio_unlock(to_folio);
+   }
+
+   folio_put(to_folio);
+
+   return handle;
+}
+
+static void ttm_backup_shmem_fini(struct ttm_backup *backup)
+{
+   struct ttm_backup_shmem *sbackup = to_backup_shmem(backup);
+
+   fput(sbackup->filp);
+   kfree(sbackup);
+}
+
+static const st

[PATCH v4 04/12] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves

2024-06-14 Thread Thomas Hellström
To address the problem with hitches moving when bulk move
sublists are lru-bumped, register the list cursors with the
ttm_lru_bulk_move structure when traversing its list, and
when lru-bumping the list, move the cursor hitch to the tail.
This also means it's mandatory for drivers to call
ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when
initializing and finalizing the bulk move structure, so add
those calls to the amdgpu- and xe driver.

Compared to v1 this is slightly more code but less fragile
and hopefully easier to understand.

Changes in previous series:
- Completely rework the functionality
- Avoid a NULL pointer dereference assigning manager->mem_type
- Remove some leftover code causing build problems
v2:
- For hitch bulk tail moves, store the mem_type in the cursor
  instead of with the manager.
v3:
- Remove leftover mem_type member from change in v2.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  4 ++
 drivers/gpu/drm/ttm/ttm_resource.c | 89 ++
 drivers/gpu/drm/xe/xe_vm.c |  4 ++
 include/drm/ttm/ttm_resource.h | 56 ++--
 4 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3abfa66d72a2..97743993d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2420,6 +2420,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (r)
return r;
 
+   ttm_lru_bulk_move_init(>lru_bulk_move);
+
vm->is_compute_context = false;
 
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2484,6 +2486,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
amdgpu_vm_fini_entities(vm);
 
return r;
@@ -2640,6 +2643,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
}
}
 
+   ttm_lru_bulk_move_fini(>mman.bdev, >lru_bulk_move);
 }
 
 /**
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 9c8b6499edfb..a03090683e79 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,49 @@
 
 #include 
 
+/* Detach the cursor from the bulk move list*/
+static void
+ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
+{
+   cursor->bulk = NULL;
+   list_del_init(>bulk_link);
+}
+
+/* Move the cursor to the end of the bulk move list it's in */
+static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk,
+  struct ttm_resource_cursor 
*cursor)
+{
+   struct ttm_lru_bulk_move_pos *pos;
+
+   if (WARN_ON_ONCE(bulk != cursor->bulk)) {
+   list_del_init(>bulk_link);
+   return;
+   }
+
+   pos = >pos[cursor->mem_type][cursor->priority];
+   if (pos)
+   list_move(>hitch.link, >last->lru.link);
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
+/* Move all cursors attached to a bulk move to its end */
+static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_move_bulk_tail(bulk, cursor);
+}
+
+/* Remove a cursor from an empty bulk move list */
+static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk)
+{
+   struct ttm_resource_cursor *cursor, *next;
+
+   list_for_each_entry_safe(cursor, next, >cursor_list, bulk_link)
+   ttm_resource_cursor_clear_bulk(cursor);
+}
+
 /**
  * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
  * @cursor: The struct ttm_resource_cursor to finalize.
@@ -45,6 +88,7 @@ void ttm_resource_cursor_fini_locked(struct 
ttm_resource_cursor *cursor)
 {
lockdep_assert_held(>man->bdev->lru_lock);
list_del_init(>hitch.link);
+   ttm_resource_cursor_clear_bulk(cursor);
 }
 
 /**
@@ -73,9 +117,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor 
*cursor)
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)
 {
memset(bulk, 0, sizeof(*bulk));
+   INIT_LIST_HEAD(>cursor_list);
 }
 EXPORT_SYMBOL(ttm_lru_bulk_move_init);
 
+/**
+ * ttm_lru_bulk_move_fini - finalize a bulk move structure
+ * @bdev: The struct ttm_device
+ * @bulk: the structure to finalize
+ *
+ * Sanity checks that bulk moves don't have any
+ * resources left and hence no cursors attached.
+ */
+void ttm_lru_bulk_move_

[PATCH v4 03/12] drm/ttm: Use LRU hitches

2024-06-14 Thread Thomas Hellström
Have iterators insert themselves into the list they are iterating
over using hitch list nodes. Since only the iterator owner
can remove these list nodes from the list, it's safe to unlock
the list and when continuing, use them as a starting point. Due to
the way LRU bumping works in TTM, newly added items will not be
missed, and bumped items will be iterated over a second time before
reaching the end of the list.

The exception is list with bulk move sublists. When bumping a
sublist, a hitch that is part of that sublist will also be moved
and we might miss items if restarting from it. This will be
addressed in a later patch.

Changes in previous series:
- Updated ttm_resource_cursor_fini() documentation.
v2:
- Don't reorder ttm_resource_manager_first() and _next().
  (Christian König).
- Use list_add instead of list_move
  (Christian König)
v3:
- Split into two patches, one cleanup, one new functionality
  (Christian König)
- use ttm_resource_cursor_fini_locked() instead of open-coding
  (Matthew Brost)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  1 +
 drivers/gpu/drm/ttm/ttm_device.c   |  9 +++--
 drivers/gpu/drm/ttm/ttm_resource.c | 56 +-
 include/drm/ttm/ttm_resource.h |  9 +++--
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..43eda720657f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -621,6 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (locked)
dma_resv_unlock(res->bo->base.resv);
}
+   ttm_resource_cursor_fini_locked();
 
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 09411978a13a..f9e9b1ec8c8a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -170,12 +170,17 @@ int ttm_device_swapout(struct ttm_device *bdev, struct 
ttm_operation_ctx *ctx,
num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
-   if (!ret)
+   if (!ret) {
+   ttm_resource_cursor_fini();
return num_pages;
-   if (ret != -EBUSY)
+   }
+   if (ret != -EBUSY) {
+   ttm_resource_cursor_fini();
return ret;
+   }
}
}
+   ttm_resource_cursor_fini_locked();
spin_unlock(>lru_lock);
return 0;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 8bfbc0e8..9c8b6499edfb 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,37 @@
 
 #include 
 
+/**
+ * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called with the LRU lock held. The function
+ * can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor)
+{
+   lockdep_assert_held(>man->bdev->lru_lock);
+   list_del_init(>hitch.link);
+}
+
+/**
+ * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage
+ * @cursor: The struct ttm_resource_cursor to finalize.
+ *
+ * The function pulls the LRU list cursor off any lists it was previusly
+ * attached to. Needs to be called without the LRU list lock held. The
+ * function can be called multiple times after eachother.
+ */
+void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor)
+{
+   spinlock_t *lru_lock = >man->bdev->lru_lock;
+
+   spin_lock(lru_lock);
+   ttm_resource_cursor_fini_locked(cursor);
+   spin_unlock(lru_lock);
+}
+
 /**
  * ttm_lru_bulk_move_init - initialize a bulk move structure
  * @bulk: the structure to init
@@ -485,12 +516,15 @@ void ttm_resource_manager_debug(struct 
ttm_resource_manager *man,
 EXPORT_SYMBOL(ttm_resource_manager_debug);
 
 /**
- * ttm_resource_manager_first
- *
+ * ttm_resource_manager_first() - Start iterating over the resources
+ * of a resource manager
  * @man: resource manager to iterate over
  * @cursor: cursor to record the position
  *
- * Returns the first resource from the resource manager.
+ * Initializes the cursor and starts iterating. When done iterating,
+ * the caller must explicitly call ttm_resource_cursor_fini().
+ *
+ * Return: The

[PATCH v4 01/12] drm/ttm: Allow TTM LRU list nodes of different types

2024-06-14 Thread Thomas Hellström
To be able to handle list unlocking while traversing the LRU
list, we want the iterators not only to point to the next
position of the list traversal, but to insert themselves as
list nodes at that point to work around the fact that the
next node might otherwise disappear from the list while
the iterator is pointing to it.

These list nodes need to be easily distinguishable from other
list nodes so that others traversing the list can skip
over them.

So declare a struct ttm_lru_item, with a struct list_head member
and a type enum. This will slightly increase the size of a
struct ttm_resource.

Changes in previous series:
- Update enum ttm_lru_item_type documentation.
v3:
- Introduce ttm_lru_first_res_or_null()
  (Christian König, Thomas Hellström)

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Brost 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_device.c   |  4 +-
 drivers/gpu/drm/ttm/ttm_resource.c | 89 +++---
 include/drm/ttm/ttm_resource.h | 54 +-
 3 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 434cf0258000..09411978a13a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -274,14 +274,14 @@ static void ttm_device_clear_lru_dma_mappings(struct 
ttm_device *bdev,
struct ttm_resource *res;
 
spin_lock(>lru_lock);
-   while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
+   while ((res = ttm_lru_first_res_or_null(list))) {
struct ttm_buffer_object *bo = res->bo;
 
/* Take ref against racing releases once lru_lock is unlocked */
if (!ttm_bo_get_unless_zero(bo))
continue;
 
-   list_del_init(>lru);
+   list_del_init(>resource->lru.link);
spin_unlock(>lru_lock);
 
if (bo->ttm)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 4a66b851b67d..db9a7a3717c4 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -70,8 +70,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
dma_resv_assert_held(pos->last->bo->base.resv);
 
man = ttm_manager_type(pos->first->bo->bdev, i);
-   list_bulk_move_tail(>lru[j], >first->lru,
-   >last->lru);
+   list_bulk_move_tail(>lru[j], >first->lru.link,
+   >last->lru.link);
}
}
 }
@@ -84,14 +84,38 @@ ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, 
struct ttm_resource *res)
return >pos[res->mem_type][res->bo->priority];
 }
 
+/* Return the previous resource on the list (skip over non-resource list 
items) */
+static struct ttm_resource *ttm_lru_prev_res(struct ttm_resource *cur)
+{
+   struct ttm_lru_item *lru = >lru;
+
+   do {
+   lru = list_prev_entry(lru, link);
+   } while (!ttm_lru_item_is_res(lru));
+
+   return ttm_lru_item_to_res(lru);
+}
+
+/* Return the next resource on the list (skip over non-resource list items) */
+static struct ttm_resource *ttm_lru_next_res(struct ttm_resource *cur)
+{
+   struct ttm_lru_item *lru = >lru;
+
+   do {
+   lru = list_next_entry(lru, link);
+   } while (!ttm_lru_item_is_res(lru));
+
+   return ttm_lru_item_to_res(lru);
+}
+
 /* Move the resource to the tail of the bulk move range */
 static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
   struct ttm_resource *res)
 {
if (pos->last != res) {
if (pos->first == res)
-   pos->first = list_next_entry(res, lru);
-   list_move(>lru, >last->lru);
+   pos->first = ttm_lru_next_res(res);
+   list_move(>lru.link, >last->lru.link);
pos->last = res;
}
 }
@@ -122,11 +146,11 @@ static void ttm_lru_bulk_move_del(struct 
ttm_lru_bulk_move *bulk,
pos->first = NULL;
pos->last = NULL;
} else if (pos->first == res) {
-   pos->first = list_next_entry(res, lru);
+   pos->first = ttm_lru_next_res(res);
} else if (pos->last == res) {
-   pos->last = list_prev_entry(res, lru);
+   pos->last = ttm_lru_prev_res(res);
} else {
-   list_move(>lru, >last->lru);
+   list_move(>lru.link, >last->lru.link);
}
 }
 
@@ -155,7 +179,7 @@ void ttm_resource_move_to_lru_tail(struct ttm_resourc

[PATCH v4 00/12] TTM shrinker helpers and xe buffer object shrinker

2024-06-14 Thread Thomas Hellström
This series implements TTM shrinker / eviction helpers and an xe bo
shrinker. It builds on two previous series, *and obsoletes these*. First

https://www.mail-archive.com/dri-devel@lists.freedesktop.org/msg484425.html

Second the previous TTM shrinker series

https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/

Where the comment about layering
https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9

now addressed, and this version also implements shmem objects for backup
rather than direct swap-cache insertions, which was used in the previuos
series. It turns out that with per-page backup / shrinking, shmem objects
appears to work just as well as direct swap-cache insertions with the
added benefit that was introduced in the previous TTM shrinker series to
avoid running out of swap entries isn't really needed.

Patch 1-4 implements restartable LRU list iteration.

Patch 5 implements a LRU walker + resv locking helper

Patch 6 moves TTM swapping over to the walker.

Patch 7 moves TTM eviction over to the walker.

Patch 8 could in theory be skipped but introduces a possibility to easily
add or test multiple backup backends, like the direct swap-cache
insertion or even files into fast dedicated nvme storage for for example.

Patch 9 introduces helpers in the ttm_pool code for page-by-page shrinking
and recovery. It avoids having to temporarily allocate a huge amount of
memory to be able to shrink a buffer object. It also introduces the
possibility to immediately write-back pages if needed, since that tends
to be a bit delayed when left to kswapd.

Patch 10 Adds a simple error injection to the above code to help increase
test coverage.

Patch 11 Implements an xe bo shrinker and a common helper in TTM for
shrinking.

Patch 12-21 are really a separate POC series, for introducing drm_exec locking
in TTM. The patch touches both drm_exec and dma-buf and is for now marked as
an RFC:

Patch 12 Increases (removes) the XE_PL_TT watermark.

v2:
- Squash obsolete revision history in the patch commit messages.
- Fix a couple of review comments by Christian
- Don't store the mem_type in the TTM managers but in the
  resource cursor.
- Rename introduced TTM *back_up* function names to *backup*
- Add ttm pool recovery fault injection.
- Shrinker xe kunit test
- Various bugfixes

v3:
- Address some review comments from Matthew Brost and Christian König.
- Use the restartable LRU walk for TTM swapping and eviction.
- Provide a POC drm_exec locking implementation for exhaustive
  eviction. (Christian König).

v4:
- Remove the RFC exhaustive eviction part. While the path to exhaustive
  eviction is pretty clear and demonstrated in v3, there is still some
  drm_exec work that needs to be agreed and implemented.
- Add shrinker power management. On some hw we need to wake when shrinking.
- Fix the lru walker helper for -EALREADY errors.
- Add drm/xe: Increase the XE_PL_TT watermark.

Cc: Somalapuram Amaranath 
Cc: Christian König 
Cc: Matthew Brost 
Cc: 

Thomas Hellström (12):
  drm/ttm: Allow TTM LRU list nodes of different types
  drm/ttm: Slightly clean up LRU list iteration
  drm/ttm: Use LRU hitches
  drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist
moves
  drm/ttm: Provide a generic LRU walker helper
  drm/ttm: Use the LRU walker helper for swapping
  drm/ttm: Use the LRU walker for eviction
  drm/ttm: Add a virtual base class for graphics memory backup
  drm/ttm/pool: Provide a helper to shrink pages
  drm/ttm: Use fault-injection to test error paths
  drm/ttm, drm/xe: Add a shrinker for xe bos
  drm/xe: Increase the XE_PL_TT watermark

 drivers/gpu/drm/Kconfig|  10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |   4 +
 drivers/gpu/drm/ttm/Makefile   |   2 +-
 drivers/gpu/drm/ttm/ttm_backup_shmem.c | 137 
 drivers/gpu/drm/ttm/ttm_bo.c   | 463 -
 drivers/gpu/drm/ttm/ttm_bo_util.c  | 212 +++
 drivers/gpu/drm/ttm/ttm_device.c   |  29 +-
 drivers/gpu/drm/ttm/ttm_pool.c | 412 +-
 drivers/gpu/drm/ttm/ttm_resource.c | 264 +++---
 drivers/gpu/drm/ttm/ttm_tt.c   |  37 ++
 drivers/gpu/drm/xe/Makefile|   1 +
 drivers/gpu/drm/xe/tests/xe_bo.c   | 118 +++
 drivers/gpu/drm/xe/tests/xe_bo_test.c  |   1 +
 drivers/gpu/drm/xe/tests/xe_bo_test.h  |   1 +
 drivers/gpu/drm/xe/xe_bo.c | 139 +++-
 drivers/gpu/drm/xe/xe_bo.h |   4 +
 drivers/gpu/drm/xe/xe_device.c |   8 +
 drivers/gpu/drm/xe/xe_device_types.h   |   2 +
 drivers/gpu/drm/xe/xe_shrinker.c   | 287 +++
 drivers/gpu/drm/xe/xe_shrinker.h   |  18 +
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c|   3 +-
 drivers/gpu/drm/xe/xe_vm.c |   4 +
 include/drm/ttm/ttm_backup.h   | 136 
 include/drm/ttm/ttm_bo.h   |  48 ++-
 include/drm/ttm

Re: [PATCH v13 12/12] drm/ttm/tests: Use u32 and u64 over uint*_t types

2024-06-11 Thread Thomas Hellström
On Mon, 2024-06-03 at 15:36 +0200, Karolina Stolarek wrote:
> Update the tests and helpers to use unsigned kernel types.
> 
> Signed-off-by: Karolina Stolarek 
> Suggested-by: Thomas Hellström 
Reviewed-by: Thomas Hellström 

> ---
>  drivers/gpu/drm/ttm/tests/ttm_bo_test.c   | 12 ++--
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |  6 ++
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h |  4 ++--
>  drivers/gpu/drm/ttm/tests/ttm_pool_test.c |  2 +-
>  drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 16 
>  drivers/gpu/drm/ttm/tests/ttm_tt_test.c   | 12 ++--
>  6 files changed, 25 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> index b53483ce3570..aafc22664c5e 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> @@ -237,7 +237,7 @@ static void ttm_bo_unreserve_basic(struct kunit
> *test)
>   struct ttm_place *place;
>   struct ttm_resource_manager *man;
>   unsigned int bo_prio = TTM_MAX_BO_PRIORITY - 1;
> - uint32_t mem_type = TTM_PL_SYSTEM;
> + u32 mem_type = TTM_PL_SYSTEM;
>   int err;
>  
>   place = ttm_place_kunit_init(test, mem_type, 0);
> @@ -278,7 +278,7 @@ static void ttm_bo_unreserve_pinned(struct kunit
> *test)
>   struct ttm_device *ttm_dev;
>   struct ttm_resource *res1, *res2;
>   struct ttm_place *place;
> - uint32_t mem_type = TTM_PL_SYSTEM;
> + u32 mem_type = TTM_PL_SYSTEM;
>   int err;
>  
>   ttm_dev = kunit_kzalloc(test, sizeof(*ttm_dev), GFP_KERNEL);
> @@ -322,7 +322,7 @@ static void ttm_bo_unreserve_bulk(struct kunit
> *test)
>   struct ttm_device *ttm_dev;
>   struct ttm_place *place;
>   struct dma_resv *resv;
> - uint32_t mem_type = TTM_PL_SYSTEM;
> + u32 mem_type = TTM_PL_SYSTEM;
>   unsigned int bo_priority = 0;
>   int err;
>  
> @@ -380,7 +380,7 @@ static void ttm_bo_put_basic(struct kunit *test)
>   struct ttm_resource *res;
>   struct ttm_device *ttm_dev;
>   struct ttm_place *place;
> - uint32_t mem_type = TTM_PL_SYSTEM;
> + u32 mem_type = TTM_PL_SYSTEM;
>   int err;
>  
>   place = ttm_place_kunit_init(test, mem_type, 0);
> @@ -495,7 +495,7 @@ static void ttm_bo_pin_unpin_resource(struct
> kunit *test)
>   struct ttm_resource *res;
>   struct ttm_device *ttm_dev;
>   struct ttm_place *place;
> - uint32_t mem_type = TTM_PL_SYSTEM;
> + u32 mem_type = TTM_PL_SYSTEM;
>   unsigned int bo_priority = 0;
>   int err;
>  
> @@ -546,7 +546,7 @@ static void ttm_bo_multiple_pin_one_unpin(struct
> kunit *test)
>   struct ttm_resource *res;
>   struct ttm_device *ttm_dev;
>   struct ttm_place *place;
> - uint32_t mem_type = TTM_PL_SYSTEM;
> + u32 mem_type = TTM_PL_SYSTEM;
>   unsigned int bo_priority = 0;
>   int err;
>  
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> index 48ab541f821f..c14cb2b48e68 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> @@ -42,8 +42,7 @@ static struct ttm_placement mock_placement = {
>   .placement = _place,
>  };
>  
> -static struct ttm_tt *ttm_tt_simple_create(struct ttm_buffer_object
> *bo,
> -    uint32_t page_flags)
> +static struct ttm_tt *ttm_tt_simple_create(struct ttm_buffer_object
> *bo, u32 page_flags)
>  {
>   struct ttm_tt *tt;
>  
> @@ -202,8 +201,7 @@ struct ttm_buffer_object
> *ttm_bo_kunit_init(struct kunit *test,
>  }
>  EXPORT_SYMBOL_GPL(ttm_bo_kunit_init);
>  
> -struct ttm_place *ttm_place_kunit_init(struct kunit *test,
> -    uint32_t mem_type, uint32_t
> flags)
> +struct ttm_place *ttm_place_kunit_init(struct kunit *test, u32
> mem_type, u32 flags)
>  {
>   struct ttm_place *place;
>  
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h
> b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h
> index aa70b50e7640..c7da23232ffa 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h
> +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h
> @@ -38,8 +38,8 @@ struct ttm_buffer_object *ttm_bo_kunit_init(struct
> kunit *test,
>       struct ttm_test_devices
> *devs,
>       size_t size,
>       struct dma_resv *obj);
> -struct ttm_place *ttm_place_kunit_init(struct kunit *test,
> -   

Re: [PATCH v13 11/12] drm/ttm/tests: Correct modules' licenses

2024-06-11 Thread Thomas Hellström
On Mon, 2024-06-03 at 15:36 +0200, Karolina Stolarek wrote:
> The test files are GPL and MIT, so update the definitions
> accordingly.
> 
> Signed-off-by: Karolina Stolarek 

Reviewed-by: Thomas Hellström 


> ---
>  drivers/gpu/drm/ttm/tests/ttm_bo_test.c   | 2 +-
>  drivers/gpu/drm/ttm/tests/ttm_device_test.c   | 2 +-
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c | 2 +-
>  drivers/gpu/drm/ttm/tests/ttm_pool_test.c | 2 +-
>  drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 2 +-
>  drivers/gpu/drm/ttm/tests/ttm_tt_test.c   | 2 +-
>  6 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> index ffcfe5e6709a..b53483ce3570 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> @@ -627,4 +627,4 @@ static struct kunit_suite ttm_bo_test_suite = {
>  
>  kunit_test_suites(_bo_test_suite);
>  
> -MODULE_LICENSE("GPL");
> +MODULE_LICENSE("GPL and additional rights");
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
> index 19eaff22e6ae..0f235a834ede 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
> @@ -209,4 +209,4 @@ static struct kunit_suite ttm_device_test_suite =
> {
>  
>  kunit_test_suites(_device_test_suite);
>  
> -MODULE_LICENSE("GPL");
> +MODULE_LICENSE("GPL and additional rights");
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> index b6ea64488c77..48ab541f821f 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> @@ -308,4 +308,4 @@ void ttm_test_devices_fini(struct kunit *test)
>  }
>  EXPORT_SYMBOL_GPL(ttm_test_devices_fini);
>  
> -MODULE_LICENSE("GPL");
> +MODULE_LICENSE("GPL and additional rights");
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
> index 4643f91c6bd5..9070ca43df53 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c
> @@ -433,4 +433,4 @@ static struct kunit_suite ttm_pool_test_suite = {
>  
>  kunit_test_suites(_pool_test_suite);
>  
> -MODULE_LICENSE("GPL");
> +MODULE_LICENSE("GPL and additional rights");
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> index 67584058dadb..b90523422d24 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> @@ -332,4 +332,4 @@ static struct kunit_suite ttm_resource_test_suite
> = {
>  
>  kunit_test_suites(_resource_test_suite);
>  
> -MODULE_LICENSE("GPL");
> +MODULE_LICENSE("GPL and additional rights");
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_tt_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_tt_test.c
> index a9d75a33acaf..dd9bac7cb7b0 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_tt_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_tt_test.c
> @@ -398,4 +398,4 @@ static struct kunit_suite ttm_tt_test_suite = {
>  
>  kunit_test_suites(_tt_test_suite);
>  
> -MODULE_LICENSE("GPL");
> +MODULE_LICENSE("GPL and additional rights");



Re: [PATCH v13 06/12] drm/ttm/tests: Add tests with mock resource managers

2024-06-11 Thread Thomas Hellström
+ ttm_mock_manager_fini(priv->ttm_dev, fst_mem);
> + ttm_mock_manager_fini(priv->ttm_dev, tmp_mem);
> +}
> +
>  static struct kunit_case ttm_bo_validate_test_cases[] = {
>   KUNIT_CASE_PARAM(ttm_bo_init_reserved_sys_man,
> ttm_bo_types_gen_params),
> + KUNIT_CASE_PARAM(ttm_bo_init_reserved_mock_man,
> ttm_bo_types_gen_params),
>   KUNIT_CASE(ttm_bo_init_reserved_resv),
> + KUNIT_CASE_PARAM(ttm_bo_validate_basic,
> ttm_bo_types_gen_params),
>   KUNIT_CASE(ttm_bo_validate_invalid_placement),
> + KUNIT_CASE_PARAM(ttm_bo_validate_same_placement,
> +  ttm_bo_validate_mem_gen_params),
> + KUNIT_CASE(ttm_bo_validate_failed_alloc),
>   KUNIT_CASE(ttm_bo_validate_pinned),
> + KUNIT_CASE(ttm_bo_validate_busy_placement),
> + KUNIT_CASE_PARAM(ttm_bo_validate_multihop,
> ttm_bo_types_gen_params),
>   {}
>  };
>  
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> index 2f590bae53f8..cb1480e44495 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> @@ -27,8 +27,36 @@ static int mock_move(struct ttm_buffer_object *bo,
> bool evict,
>    struct ttm_resource *new_mem,
>    struct ttm_place *hop)
>  {
> - bo->resource = new_mem;
> - return 0;
> + struct ttm_resource *old_mem = bo->resource;
> +
> + if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && !bo-
> >ttm)) {
> + ttm_bo_move_null(bo, new_mem);
> + return 0;
> + }
> +
> + if (bo->resource->mem_type == TTM_PL_VRAM &&
> +     new_mem->mem_type == TTM_PL_SYSTEM) {
> + hop->mem_type = TTM_PL_TT;
> + hop->flags = TTM_PL_FLAG_TEMPORARY;
> + hop->fpfn = 0;
> + hop->lpfn = 0;
> + return -EMULTIHOP;
> + }
> +
> + if (old_mem->mem_type == TTM_PL_SYSTEM &&
> +     new_mem->mem_type == TTM_PL_TT) {
> + ttm_bo_move_null(bo, new_mem);
> + return 0;
> + }
> +
> + if (old_mem->mem_type == TTM_PL_TT &&
> +     new_mem->mem_type == TTM_PL_SYSTEM) {
> + ttm_resource_free(bo, >resource);
> + ttm_bo_assign_mem(bo, new_mem);

This above is equivalent to ttm_bo_move_null(). Replace or coalesce
with the if-statement above.

With that,
Reviewed-by: Thomas Hellström 

Please also send last version to the intel-xe list. IIRC that will
trigger the KUNIT run with the new subtests!

Thanks,
Thomas



> + return 0;
> + }
> +
> + return ttm_bo_move_memcpy(bo, ctx, new_mem);
>  }
>  
>  struct ttm_device_funcs ttm_dev_funcs = {
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
> b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
> new file mode 100644
> index ..ad77816c13ca
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
> @@ -0,0 +1,206 @@
> +// SPDX-License-Identifier: GPL-2.0 AND MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +#include 
> +#include 
> +#include 
> +
> +#include "ttm_mock_manager.h"
> +
> +static inline struct ttm_mock_manager *
> +to_mock_mgr(struct ttm_resource_manager *man)
> +{
> + return container_of(man, struct ttm_mock_manager, man);
> +}
> +
> +static inline struct ttm_mock_resource *
> +to_mock_mgr_resource(struct ttm_resource *res)
> +{
> + return container_of(res, struct ttm_mock_resource, base);
> +}
> +
> +static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
> +   struct ttm_buffer_object *bo,
> +   const struct ttm_place *place,
> +   struct ttm_resource **res)
> +{
> + struct ttm_mock_manager *manager = to_mock_mgr(man);
> + struct ttm_mock_resource *mock_res;
> + struct drm_buddy *mm = >mm;
> + u64 lpfn, fpfn, alloc_size;
> + int err;
> +
> + mock_res = kzalloc(sizeof(*mock_res), GFP_KERNEL);
> +
> + if (!mock_res)
> + return -ENOMEM;
> +
> + fpfn = 0;
> + lpfn = man->size;
> +
> + ttm_resource_init(bo, place, _res->base);
> + INIT_LIST_HEAD(_res->blocks);
> +
> + if (place->flags & TTM_PL_FLAG_TOPDOWN)
> + mock_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> + mock_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
> +
> + alloc_size = (uint64_t)mock_re

Re: [PATCH v12 06/10] drm/ttm/tests: Add tests with mock resource managers

2024-06-03 Thread Thomas Hellström
On Mon, 2024-06-03 at 10:28 +0200, Karolina Stolarek wrote:
> > > > > diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> > > > > b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> > > > > index 2f590bae53f8..2a2585b37118 100644
> > > > > --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> > > > > +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> > > > > @@ -27,8 +27,42 @@ static int mock_move(struct
> > > > > ttm_buffer_object
> > > > > *bo,
> > > > > bool evict,
> > > > >        struct ttm_resource *new_mem,
> > > > >        struct ttm_place *hop)
> > > > >    {
> (...)
> > > > > +
> > > > > + if (ret)
> > > > > + return ret;
> > > > > +
> > > > > + ttm_resource_free(bo, >resource);
> > > > > + ttm_bo_assign_mem(bo, new_mem);
> > > > > + return 0;
> > > > > + }
> > > > > +
> > > > > + return ttm_bo_move_memcpy(bo, ctx, new_mem);
> > > > 
> > > > Do we hit this move_memcpy()? Since the mock manager doesn't
> > > > actually
> > > > reserve any memory to manager, I'd expect this to run into
> > > > problems?
> > > 
> > > We do. The mock manager has use_tt=true, so on move, we'd use
> > > ttm_kmap_iter_tt_init() for src and dest and copy the pages. I'm
> > > not
> > > sure if that's the right approach, but it enables me to test if
> > > ttm_operation_ctx's bytes_moved is correctly updated.
> > 
> > Ah, ok. It's probably not a common use-case since with both src and
> > dst
> > having use_tt, IIRC ttm should keep the pages and their content
> > mostly
> > intact across a move. So you would memcpy the source on itself?
> > 
> > But it would give some coverage of the copy code though.
> 
> I dug around and it looks like, in the current scenario, 
> ttm_bo_move_memcpy() is just ttm_bo_move_sync_cleanup() 
> (ttm_resource_free + ttm_bo_assign_mem). That means I should revisit
> the 
> definitions of move and mock manager... I'll try to simplify them.
> 
> Do I understand correctly that we'd prefer to have a mock manager
> with 
> user_tt=false?

Yes, but then you need to allocate a chunk of contigous memory for the
mock manager to manage. And instead of using drm_buddy you'd have to
use drm_mm to manage it, since the ttm_kmap_iter default iterators can
only handle either
a) Contigous memory regions as returned from the drm_mm manager.
b) Fragmented memory regions as returned from the drm_buddy manager,
but in that case, they currently only handle pci io memory.

So I'd suggest to go with the current code and mark as a TODO: to
implement a) above.

/Thomas


> 
> All the best,
> Karolina
> 
> > 
> > /Thomas



Re: [PATCH v12 06/10] drm/ttm/tests: Add tests with mock resource managers

2024-06-03 Thread Thomas Hellström
Hi

On Mon, 2024-06-03 at 08:55 +0200, Karolina Stolarek wrote:
> Hi Thomas,
> 
> On 29.05.2024 14:58, Thomas Hellström wrote:
> > On Wed, 2024-05-15 at 13:24 +0200, Karolina Stolarek wrote:
> > > Add mock resource manager to test ttm_bo_validate() with non-
> > > system
> > > placements. Update KConfig entry to enable DRM Buddy allocator,
> > > used
> > > by the mock manager. Update move function to do more than just
> > > assign
> > > a resource.
> > > 
> > > Signed-off-by: Karolina Stolarek 
> > > Tested-by: Somalapuram, Amaranath 
> > > ---
> > >   drivers/gpu/drm/Kconfig   |   1 +
> > >   drivers/gpu/drm/ttm/tests/.kunitconfig    |   1 +
> > >   drivers/gpu/drm/ttm/tests/Makefile    |   1 +
> > >   .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  | 274
> > > ++
> > >   drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |  38 ++-
> > >   drivers/gpu/drm/ttm/tests/ttm_mock_manager.c  | 207
> > > +
> > >   drivers/gpu/drm/ttm/tests/ttm_mock_manager.h  |  31 ++
> > >   7 files changed, 551 insertions(+), 2 deletions(-)
> > >   create mode 100644 drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
> > >   create mode 100644 drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
> > > 
> > > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> > > index 026444eeb5c6..4ba16501dbf7 100644
> > > --- a/drivers/gpu/drm/Kconfig
> > > +++ b/drivers/gpu/drm/Kconfig
> > > @@ -234,6 +234,7 @@ config DRM_TTM_KUNIT_TEST
> > >   default n
> > >   depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
> > >   select DRM_TTM
> > > +    select DRM_BUDDY
> > >   select DRM_EXPORT_FOR_TESTS if m
> > >   select DRM_KUNIT_TEST_HELPERS
> > >   default KUNIT_ALL_TESTS
> > > diff --git a/drivers/gpu/drm/ttm/tests/.kunitconfig
> > > b/drivers/gpu/drm/ttm/tests/.kunitconfig
> > > index 1ae1ffabd51e..772f0e1f4103 100644
> > > --- a/drivers/gpu/drm/ttm/tests/.kunitconfig
> > > +++ b/drivers/gpu/drm/ttm/tests/.kunitconfig
> > > @@ -1,3 +1,4 @@
> > >   CONFIG_KUNIT=y
> > >   CONFIG_DRM=y
> > >   CONFIG_DRM_TTM_KUNIT_TEST=y
> > > +CONFIG_DRM_BUDDY=y
> > 
> > Is this strictly needed when CONFIG_DRM_TTM_KUNIT_TEST is selected?
> > Wouldn't that be enabled implicitly?
> 
> Ah, yes, that should get selected implicitly. I'll check and remove
> if 
> that works, thanks.
> 
> > 
> > > diff --git a/drivers/gpu/drm/ttm/tests/Makefile
> > > b/drivers/gpu/drm/ttm/tests/Makefile
> > > index 2e5ed63fb414..f3149de77541 100644
> > > --- a/drivers/gpu/drm/ttm/tests/Makefile
> > > +++ b/drivers/gpu/drm/ttm/tests/Makefile
> > > @@ -7,4 +7,5 @@ obj-$(CONFIG_DRM_TTM_KUNIT_TEST) += \
> > >   ttm_tt_test.o \
> > >   ttm_bo_test.o \
> > >   ttm_bo_validate_test.o \
> > > +    ttm_mock_manager.o \
> > >   ttm_kunit_helpers.o
> > > diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> > > b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> > > index a5520b0631a3..8b62d95b8ab8 100644
> > > --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> > > +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> > > @@ -8,12 +8,15 @@
> > >   #include 
> > >   
> > >   #include "ttm_kunit_helpers.h"
> > > +#include "ttm_mock_manager.h"
> > >   
> > >   #define BO_SIZE SZ_4K
> > > +#define MANAGER_SIZE SZ_1M
> > >   
> > >   struct ttm_bo_validate_test_case {
> > >   const char *description;
> > >   enum ttm_bo_type bo_type;
> > > + uint32_t mem_type;
> > 
> > Please use u32 instead of unit32_t in new code. The unit32_t usage
> > in
> > TTM is a remnant from when much of the drm- and ttm code was shared
> > with *bsd. Same in a couple of places below.
> 
> I see. So, the question is what should I about other test code that
> is 
> already merged? Submit a separate patch to change uint32_t --> u32?

Yes, IMO that's a good idea. And at some point I think we would want to
move all of TTM over as well.

Christian, any preferences?

> 
> > 
> > >   bool with_ttm;
> > >   };
> > >   
> > > @@ -102,6 +105,49 @@ static void
> > > ttm_bo

[PATCH v2] MAINTAINERS: Update Xe driver maintainers

2024-06-02 Thread Thomas Hellström
Add Rodrigo Vivi as an Xe driver maintainer.

v2:
- Cc also Lucas De Marchi (Rodrigo vivi)
- Remove a blank line in commit the commit message (Lucas De Marchi)

Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Rodrigo Vivi 
Cc: Lucas De Marchi 
Cc: dri-devel@lists.freedesktop.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Thomas Hellström 
Acked-by: Rodrigo Vivi 
Acked-by: Lucas De Marchi 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 572be0546e21..8f9982c99257 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11037,6 +11037,7 @@ F:  include/uapi/drm/i915_drm.h
 INTEL DRM XE DRIVER (Lunar Lake and newer)
 M: Lucas De Marchi 
 M: Thomas Hellström 
+M: Rodrigo Vivi 
 L: intel...@lists.freedesktop.org
 S: Supported
 W: https://drm.pages.freedesktop.org/intel-docs/
-- 
2.44.0



[PATCH] MAINTAINERS: Update Xe driver maintainers

2024-05-31 Thread Thomas Hellström
Add Rodrigo Vivi as an Xe driver maintainer.

Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Rodrigo Vivi 
Cc: dri-devel@lists.freedesktop.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellström 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 572be0546e21..8f9982c99257 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11037,6 +11037,7 @@ F:  include/uapi/drm/i915_drm.h
 INTEL DRM XE DRIVER (Lunar Lake and newer)
 M: Lucas De Marchi 
 M: Thomas Hellström 
+M: Rodrigo Vivi 
 L: intel...@lists.freedesktop.org
 S: Supported
 W: https://drm.pages.freedesktop.org/intel-docs/
-- 
2.44.0



Re: [PATCH v12 06/10] drm/ttm/tests: Add tests with mock resource managers

2024-05-29 Thread Thomas Hellström
On Wed, 2024-05-15 at 13:24 +0200, Karolina Stolarek wrote:
> Add mock resource manager to test ttm_bo_validate() with non-system
> placements. Update KConfig entry to enable DRM Buddy allocator, used
> by the mock manager. Update move function to do more than just assign
> a resource.
> 
> Signed-off-by: Karolina Stolarek 
> Tested-by: Somalapuram, Amaranath 
> ---
>  drivers/gpu/drm/Kconfig   |   1 +
>  drivers/gpu/drm/ttm/tests/.kunitconfig    |   1 +
>  drivers/gpu/drm/ttm/tests/Makefile    |   1 +
>  .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  | 274
> ++
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |  38 ++-
>  drivers/gpu/drm/ttm/tests/ttm_mock_manager.c  | 207 +
>  drivers/gpu/drm/ttm/tests/ttm_mock_manager.h  |  31 ++
>  7 files changed, 551 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
>  create mode 100644 drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 026444eeb5c6..4ba16501dbf7 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -234,6 +234,7 @@ config DRM_TTM_KUNIT_TEST
>  default n
>  depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
>  select DRM_TTM
> +    select DRM_BUDDY
>  select DRM_EXPORT_FOR_TESTS if m
>  select DRM_KUNIT_TEST_HELPERS
>  default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/ttm/tests/.kunitconfig
> b/drivers/gpu/drm/ttm/tests/.kunitconfig
> index 1ae1ffabd51e..772f0e1f4103 100644
> --- a/drivers/gpu/drm/ttm/tests/.kunitconfig
> +++ b/drivers/gpu/drm/ttm/tests/.kunitconfig
> @@ -1,3 +1,4 @@
>  CONFIG_KUNIT=y
>  CONFIG_DRM=y
>  CONFIG_DRM_TTM_KUNIT_TEST=y
> +CONFIG_DRM_BUDDY=y

Is this strictly needed when CONFIG_DRM_TTM_KUNIT_TEST is selected?
Wouldn't that be enabled implicitly?

> diff --git a/drivers/gpu/drm/ttm/tests/Makefile
> b/drivers/gpu/drm/ttm/tests/Makefile
> index 2e5ed63fb414..f3149de77541 100644
> --- a/drivers/gpu/drm/ttm/tests/Makefile
> +++ b/drivers/gpu/drm/ttm/tests/Makefile
> @@ -7,4 +7,5 @@ obj-$(CONFIG_DRM_TTM_KUNIT_TEST) += \
>  ttm_tt_test.o \
>  ttm_bo_test.o \
>  ttm_bo_validate_test.o \
> +    ttm_mock_manager.o \
>  ttm_kunit_helpers.o
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> index a5520b0631a3..8b62d95b8ab8 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> @@ -8,12 +8,15 @@
>  #include 
>  
>  #include "ttm_kunit_helpers.h"
> +#include "ttm_mock_manager.h"
>  
>  #define BO_SIZE  SZ_4K
> +#define MANAGER_SIZE SZ_1M
>  
>  struct ttm_bo_validate_test_case {
>   const char *description;
>   enum ttm_bo_type bo_type;
> + uint32_t mem_type;

Please use u32 instead of unit32_t in new code. The unit32_t usage in
TTM is a remnant from when much of the drm- and ttm code was shared
with *bsd. Same in a couple of places below.

>   bool with_ttm;
>  };
>  
> @@ -102,6 +105,49 @@ static void ttm_bo_init_reserved_sys_man(struct
> kunit *test)
>   ttm_bo_put(bo);
>  }
>  
> +static void ttm_bo_init_reserved_mock_man(struct kunit *test)
> +{
> + const struct ttm_bo_validate_test_case *params = test-
> >param_value;
> + enum ttm_bo_type bo_type = params->bo_type;
> + struct ttm_test_devices *priv = test->priv;
> + uint32_t size = ALIGN(BO_SIZE, PAGE_SIZE);
> + struct ttm_operation_ctx ctx = { };
> + struct ttm_placement *placement;
> + uint32_t mem_type = TTM_PL_VRAM;
> + struct ttm_buffer_object *bo;
> + struct ttm_place *place;
> + int err;
> +
> + ttm_mock_manager_init(priv->ttm_dev, mem_type,
> MANAGER_SIZE);
> +
> + bo = kunit_kzalloc(test, sizeof(*bo), GFP_KERNEL);
> + KUNIT_ASSERT_NOT_NULL(test, bo);
> +
> + place = ttm_place_kunit_init(test, mem_type, 0);
> + placement = ttm_placement_kunit_init(test, place, 1);
> +
> + drm_gem_private_object_init(priv->drm, >base, size);
> +
> + err = ttm_bo_init_reserved(priv->ttm_dev, bo, bo_type,
> placement,
> +    PAGE_SIZE, , NULL, NULL,
> +    _ttm_bo_destroy);
> + dma_resv_unlock(bo->base.resv);
> +
> + KUNIT_EXPECT_EQ(test, err, 0);
> + KUNIT_EXPECT_EQ(test, kref_read(>kref), 1);
> + KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev);
> + KUNIT_EXPECT_EQ(test, bo->type, bo_type);
> + KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
> +
> + if (bo_type != ttm_bo_type_kernel)
> + KUNIT_EXPECT_TRUE(test,
> +   drm_mm_node_allocated(
> >base.vma_node.vm_node));
> +
> + ttm_resource_free(bo, >resource);
> + ttm_bo_put(bo);
> + ttm_mock_manager_fini(priv->ttm_dev, mem_type);
> +}
> +
>  static void 

Re: [PATCH] drm/tests: Add a missing Kconfig select

2024-05-29 Thread Thomas Hellström
On Wed, 2024-05-29 at 11:40 +0200, Maxime Ripard wrote:
> Hi,
> 
> Thanks for sending that patch
> 
> On Wed, May 29, 2024 at 11:19:55AM GMT, Thomas Hellström wrote:
> > Fix the following warning:
> > 
> > WARNING: unmet direct dependencies detected for
> > DRM_DISPLAY_HDMI_STATE_HELPER
> >   Depends on [n]: HAS_IOMEM [=y] && DRM [=y] && DRM_DISPLAY_HELPER
> > [=y] && DRM_DISPLAY_HDMI_HELPER [=n]
> >   Selected by [y]:
> >   - DRM_KUNIT_TEST [=y] && HAS_IOMEM [=y] && DRM [=y] && KUNIT [=y]
> > && MMU [=y]
> > 
> > Signed-off-by: Thomas Hellström 
> > Fixes: 54cb39e2293b ("drm/connector: hdmi: Create an HDMI sub-
> > state")
> > Cc: Maxime Ripard 
> > Cc: dri-devel@lists.freedesktop.org
> 
> I already sent a fix for that one, and just merged it:
> https://lore.kernel.org/r/20240529080013.2325748-1-mrip...@kernel.org
> 
> Let me know if it doesn't fix it for you
> 
> Maxime

Ah, great. It blocked the xe CI but this should hopefully fix it.

Thanks,
Thomas



[PATCH] drm/tests: Add a missing Kconfig select

2024-05-29 Thread Thomas Hellström
Fix the following warning:

WARNING: unmet direct dependencies detected for DRM_DISPLAY_HDMI_STATE_HELPER
  Depends on [n]: HAS_IOMEM [=y] && DRM [=y] && DRM_DISPLAY_HELPER [=y] && 
DRM_DISPLAY_HDMI_HELPER [=n]
  Selected by [y]:
  - DRM_KUNIT_TEST [=y] && HAS_IOMEM [=y] && DRM [=y] && KUNIT [=y] && MMU [=y]

Signed-off-by: Thomas Hellström 
Fixes: 54cb39e2293b ("drm/connector: hdmi: Create an HDMI sub-state")
Cc: Maxime Ripard 
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 9703429de6b9..47592b6fc868 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -79,6 +79,7 @@ config DRM_KUNIT_TEST
depends on DRM && KUNIT && MMU
select DRM_BUDDY
select DRM_DISPLAY_DP_HELPER
+   select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HDMI_STATE_HELPER
select DRM_DISPLAY_HELPER
select DRM_EXEC
-- 
2.44.0



Re: [RFC PATCH v3 13/21] drm/exec: Rework contended locking

2024-05-29 Thread Thomas Hellström
On Tue, 2024-05-28 at 13:03 +0200, Christian König wrote:
> Am 28.05.24 um 10:07 schrieb Thomas Hellström:
> > On Tue, 2024-05-28 at 08:51 +0200, Christian König wrote:
> > > > 2) Any contended lock held at loop start is completely
> > > > encapsulated
> > > > in
> > > > the ww transaction and can and will be unlocked when exiting
> > > > it, so
> > > > this patch doesn't introduce any additional problems for
> > > > userptr
> > > > handling AFAICT.
> > > The drm_exec object was intentionally design to not have anything
> > > locked
> > > at the beginning of the loop. See the discussion I had with Sima
> > > around
> > > that when pushing the drm_exec object upstream.
> > > 
> > > I would really like to stick with that design and honestly don't
> > > see
> > > the
> > > reason to change that. Contenting on a trylock seem to be much
> > > more
> > > questionable.
> > The change here is to make sure we *don't* have contention in a
> > trylock, which is otherwise inherent in the current drm_exec
> > design.
> 
> My sentence was probably a bit misleading. What I wanted to say is
> that 
> trylock as first thing in the loop sounds really odd to me.
> 
> See the intention of a trylock is to acquire something optional. What
> we 
> do for the freshly allocated BO and the 'don't try to block with the 
> mmap lock held' case is actually kind something different.
> 
> A clean approach would be to to have the BO initialization and
> backing 
> store allocation steps separated. In this case you don't even need to
> use trylock here.
> 
> And for the VM fault locking case the clean approach would be to tell
> the drm_exec object of the vm_fault parameters so that this helper
> can 
> do the drop all locks, drop the mmap lock, acquire the blocking lock
> and 
> return -EAGAIN.
> 
> This has the huge benefit that we not only stop blocking for the
> faulted 
> BO, but eventually all others which might need to move so that the 
> faulted BO is CPU accessible. I think that this is actually the more 
> problematic case.

Yes, this fault handler approach sounds ok to me. Do you mean we make
drm_exec aware of both the mmap lock and the desire not to block while
it is held? I figure that could come in handy for SVM gpu pagefaults as
well.

For the bo initalization, the suggested solution there deviates a lot
from the non-exec case, where a never-failing trylock on creation is
sometimes necessary. This will probably be an unwanted obstacle for
drm_exec conversion. 

What do you think about the following for bo init? This also basically
means the user of drm_exec can also *choose* to avoid trylock problems
already at the start of the drm_exec loop.

https://patchwork.freedesktop.org/patch/595863/?series=133643=7
and
https://patchwork.freedesktop.org/patch/595866/?series=133643=7

Then if we also do the fault handler mode we could drop this patch
assuming that drivers have all tools to handle existing cases of
ww_mutex_trylock(). 


> 
> > What I'm trying to say here is that we end up with the contended
> > lock
> > grabbed at loop start you already conceptually have a conflicting
> > lock
> > held (the we_class::acquire_key). Both these can be resolved.
> 
> Yeah, I'm perfectly aware of that. But this is just a shortcoming of 
> lockdep and not a real problem.
> 
> During the drm_exec code review we already moved the
> ww_acquire_init() 
> into the cleanup function so that it's only called at the start of
> the 
> loop. Background is that we ran into lockdep warnings with that
> otherwise.
> 
> But functionally it would still work if we do this in drm_exec_ini().
> 
> > > > 3) The need for a fully capable ww transaction helper moving
> > > > forward.
> > > > If we need a tool that also does userptr locking, then I think
> > > > we
> > > > need
> > > > to separate that from the ww transaction tool and only pass the
> > > > latter
> > > > around to TTM.
> > > drm_exec is *not* meant to be a ww_transaction helper.
> > > 
> > > The functionality here is to support drivers in their CS
> > > interface
> > > and
> > > that includes userptr handling as well as a couple of other
> > > things.
> > Then if so, I don't think drm_exec is the correct functionality to
> > pass
> > to TTM to resolve the eviction issues, but rather a ww transaction
> > helper that can be used standalone *and* by drm_exec. Now the
> > functionality would be more or less what drm exec is today, 

Re: [PATCH] drm/xe: replace format-less snprintf() with strscpy()

2024-05-28 Thread Thomas Hellström
On Tue, 2024-05-28 at 15:32 +0200, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> Using snprintf() with a format string from task->comm is a bit
> dangerous since the string may be controlled by unprivileged
> userspace:
> 
> drivers/gpu/drm/xe/xe_devcoredump.c: In function
> 'devcoredump_snapshot':
> drivers/gpu/drm/xe/xe_devcoredump.c:184:9: error: format not a string
> literal and no format arguments [-Werror=format-security]
>   184 | snprintf(ss->process_name, sizeof(ss->process_name),
> process_name);
>   | ^~~~
> 
> In this case there is no reason for an snprintf(), so use a simpler
> string copy.
> 
> Fixes: b10d0c5e9df7 ("drm/xe: Add process name to devcoredump")
> Signed-off-by: Arnd Bergmann 
Reviewed-by: Thomas Hellström 

Thanks,

Will pick up and apply as soon as our CI is fit for fight.
/Thomas



> ---
>  drivers/gpu/drm/xe/xe_devcoredump.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c
> b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 1643d44f8bc4..1973bfaece40 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -181,7 +181,7 @@ static void devcoredump_snapshot(struct
> xe_devcoredump *coredump,
>   if (task)
>   process_name = task->comm;
>   }
> - snprintf(ss->process_name, sizeof(ss->process_name),
> process_name);
> + strscpy(ss->process_name, process_name);
>   if (task)
>   put_task_struct(task);
>  



Re: [RFC PATCH v3 13/21] drm/exec: Rework contended locking

2024-05-28 Thread Thomas Hellström
On Tue, 2024-05-28 at 08:51 +0200, Christian König wrote:
> Hi Thomas,
> 
> Am 28.05.24 um 08:36 schrieb Thomas Hellström:
> > Hi, Christian.
> > 
> > I'd appreciate if you could respond to the below, since it is a bit
> > hard to try to design around a problem I don't believe exists, and
> > come
> > up with a good solution for that.
> > 
> > In short.
> > 1) To prefault userptr we have to exit the ww transaction anyway.
> 
> Yeah and I would rather like to have that handling in drm_exec at
> some time.
> 
> Basically a GEM object with outdated struct pages backing it can be 
> handled in the same loop at the ww transaction.

OK.

> 
> > 2) Any contended lock held at loop start is completely encapsulated
> > in
> > the ww transaction and can and will be unlocked when exiting it, so
> > this patch doesn't introduce any additional problems for userptr
> > handling AFAICT.
> 
> The drm_exec object was intentionally design to not have anything
> locked 
> at the beginning of the loop. See the discussion I had with Sima
> around 
> that when pushing the drm_exec object upstream.
> 
> I would really like to stick with that design and honestly don't see
> the 
> reason to change that. Contenting on a trylock seem to be much more 
> questionable.

The change here is to make sure we *don't* have contention in a
trylock, which is otherwise inherent in the current drm_exec design.

What I'm trying to say here is that we end up with the contended lock
grabbed at loop start you already conceptually have a conflicting lock
held (the we_class::acquire_key). Both these can be resolved.

> 
> > 3) The need for a fully capable ww transaction helper moving
> > forward.
> > If we need a tool that also does userptr locking, then I think we
> > need
> > to separate that from the ww transaction tool and only pass the
> > latter
> > around to TTM.
> 
> drm_exec is *not* meant to be a ww_transaction helper.
> 
> The functionality here is to support drivers in their CS interface
> and 
> that includes userptr handling as well as a couple of other things.

Then if so, I don't think drm_exec is the correct functionality to pass
to TTM to resolve the eviction issues, but rather a ww transaction
helper that can be used standalone *and* by drm_exec. Now the
functionality would be more or less what drm exec is today, but
slightly augmented.

But then IMHO instead of changing name and more or less replicating
what drm_exec is today wouldn't it be a better idea to subclass
drm_exec into a full-fledged CS helper at the time when that
functionality is indeed added?

/Thomas

> 
> Regards,
> Christian.
> 
> > 
> > Thanks,
> > Thomas
> > 
> > On Wed, 2024-05-22 at 19:42 +0200, Thomas Hellström wrote:
> > > On Wed, 2024-05-22 at 18:52 +0200, Christian König wrote:
> > > > Am 22.05.24 um 16:32 schrieb Thomas Hellström:
> > > > > On Wed, 2024-05-22 at 07:52 +0200, Christian König wrote:
> > > > > > Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > > > > > > If contention and backoff occurs during a drm_exec ww
> > > > > > > transaction,
> > > > > > > the contended lock is not locked again until the next
> > > > > > > orinary
> > > > > > > attempt to lock a dma_resv lock. However, with the
> > > > > > > introduction
> > > > > > > of
> > > > > > > drm_exec_trylock(), that doesn't work, since the locking
> > > > > > > of
> > > > > > > the
> > > > > > > contended lock needs to be a sleeping lock. Neither can
> > > > > > > we
> > > > > > > ignore
> > > > > > > locking the contended lock during a trylock since that
> > > > > > > would
> > > > > > > violate
> > > > > > > at least the ww_mutex annotations.
> > > > > > > 
> > > > > > > So resolve this by actually locking the contended lock
> > > > > > > during
> > > > > > > drm_exec_retry_on_contention(). However, this introduces
> > > > > > > a
> > > > > > > new
> > > > > > > point
> > > > > > > of failure since locking the contended lock may return -
> > > > > > > EINTR.
> > > > > > > 
> > > > > > > Hence drm_exec_retry_on_contention() must take an error
> > > > > > > parameter
> > &

Re: [RFC PATCH v3 13/21] drm/exec: Rework contended locking

2024-05-28 Thread Thomas Hellström
Hi, Christian.

I'd appreciate if you could respond to the below, since it is a bit
hard to try to design around a problem I don't believe exists, and come
up with a good solution for that.

In short.
1) To prefault userptr we have to exit the ww transaction anyway.
2) Any contended lock held at loop start is completely encapsulated in
the ww transaction and can and will be unlocked when exiting it, so
this patch doesn't introduce any additional problems for userptr
handling AFAICT.
3) The need for a fully capable ww transaction helper moving forward.
If we need a tool that also does userptr locking, then I think we need
to separate that from the ww transaction tool and only pass the latter
around to TTM.

Thanks,
Thomas

On Wed, 2024-05-22 at 19:42 +0200, Thomas Hellström wrote:
> On Wed, 2024-05-22 at 18:52 +0200, Christian König wrote:
> > Am 22.05.24 um 16:32 schrieb Thomas Hellström:
> > > On Wed, 2024-05-22 at 07:52 +0200, Christian König wrote:
> > > > Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > > > > If contention and backoff occurs during a drm_exec ww
> > > > > transaction,
> > > > > the contended lock is not locked again until the next orinary
> > > > > attempt to lock a dma_resv lock. However, with the
> > > > > introduction
> > > > > of
> > > > > drm_exec_trylock(), that doesn't work, since the locking of
> > > > > the
> > > > > contended lock needs to be a sleeping lock. Neither can we
> > > > > ignore
> > > > > locking the contended lock during a trylock since that would
> > > > > violate
> > > > > at least the ww_mutex annotations.
> > > > > 
> > > > > So resolve this by actually locking the contended lock during
> > > > > drm_exec_retry_on_contention(). However, this introduces a
> > > > > new
> > > > > point
> > > > > of failure since locking the contended lock may return -
> > > > > EINTR.
> > > > > 
> > > > > Hence drm_exec_retry_on_contention() must take an error
> > > > > parameter
> > > > > and
> > > > > also return a value indicating success.
> > > > After thinking more about that I have to pretty clearly NAK
> > > > this.
> > > >    
> > > I thought we were beyond upfront NAKing in the first reply :/
> > 
> > Well my memory could fail me, but I mentioned concerns on this
> > approach 
> > before.
> > 
> > I was a bit annoyed seeing that again. But could as well be that my
> > response never got out or that I'm mixing things up.
> 
> I haven't seen it at least. Last discussion on this I saw was
> here. I didn't see a follow-up on that.
> 
> https://lore.kernel.org/dri-devel/953c157bf69df12d831a781f0f638d93717bb044.ca...@linux.intel.com/
> 
> 
> > 
> > > > It's an intentional design decision to guarantee that at the
> > > > start of
> > > > the loop no object is locked.
> > > > 
> > > > This is because Sima and I wanted to integrate userptr handling
> > > > into
> > > > drm_exec as well in the long term.
> > > First I agree the interface looks worse with this patch.
> > > But I thought generic userptr handling were going to end up as a
> > > gpuvm
> > > helper (without using GEM objects) as we've discussed previously.
> > 
> > We might be talking past each other. That sounds like SVM, e.g. on 
> > demand paging.
> > 
> > What I mean is pre-faulting during command submission like radeon, 
> > amdgpu and i915 do for the userptr handling.
> 
> Yes, then we're talking about the same thing.
> 
> We discussed in this thread here, started by Dave.
> 
> https://lore.kernel.org/dri-devel/CAPM=9twPgn+fpbkig0Vhjt=cJdHQFbNH_Z=srhszwuvlkha...@mail.gmail.com/
> 
> I still think the right place is in drm_gpuvm for this sort of stuff.
> And I think that's the concluding argument by Sima as well.
> 
> In any case, If the planned drm_exec development is to be a full
> execbuf helper, I think we need a capable sub-helper for ONLY the ww
> transaction locking as well, with support for the various locking
> primitives. In particular if we're going to be able to port i915 ww
> transaction locking over. There are more uses of the ww locking
> transacions than execbuf.
> 
> > 
> > For that you need to re-start the whole handling similar to how you
> > need 
> > to re-start for the mutex locking when you detect that the page
> 

Re: [PATCH v12 10/10] drm/ttm/tests: Add TODO file

2024-05-24 Thread Thomas Hellström
On Wed, 2024-05-15 at 13:24 +0200, Karolina Stolarek wrote:
> List improvements for the test suite with some notes.
> 
> Signed-off-by: Karolina Stolarek 
LGTM.
Reviewed-by: Thomas Hellström 

> ---
>  drivers/gpu/drm/ttm/tests/TODO | 25 +
>  1 file changed, 25 insertions(+)
>  create mode 100644 drivers/gpu/drm/ttm/tests/TODO
> 
> diff --git a/drivers/gpu/drm/ttm/tests/TODO
> b/drivers/gpu/drm/ttm/tests/TODO
> new file mode 100644
> index ..b48d83b6166e
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/tests/TODO
> @@ -0,0 +1,25 @@
> +TODO
> +=
> +
> +- Add a test case where the only evictable BO is busy
> +- Update eviction tests so they use parametrized "from" memory type
> +- Improve mock manager's implementation, e.g. allocate a block of
> +  dummy memory that can be used when testing page mapping functions
> +- Suggestion: Add test cases with external BOs
> +- Suggestion: randomize the number and size of tested buffers in
> +  ttm_bo_validate()
> +- Agree on the naming convention
> +
> +Notes and gotchas
> +=
> +
> +- These tests are built and run with a UML kernel, because
> +  1) We are interested in hardware-independent testing
> +  2) We don't want to have actual DRM devices interacting with TTM
> + at the same time as the test one. Getting these to work in
> + parallel would require some time (...and that's a "todo" in
> itself!)
> +- Triggering ttm_bo_vm_ops callbacks from KUnit (i.e. kernel) might
> be
> +  a challenge, but is worth trying. Look at selftests like
> +  i915/gem/selftests/i915_gem_mman.c for inspiration
> +- The test suite uses UML where ioremap() call returns NULL, meaning
> that
> +  ttm_bo_ioremap() can't be tested, unless we find a way to stub it



Re: [PATCH v12 01/10] drm/ttm/tests: Fix a warning in ttm_bo_unreserve_bulk

2024-05-24 Thread Thomas Hellström
On Wed, 2024-05-15 at 13:24 +0200, Karolina Stolarek wrote:
> BOs in a bulk move have to share the same reservation object. That is
> not the case in the ttm_bo_unreserve_bulk subtest. Update
> ttm_bo_kunit_init() helper to accept dma_resv object so we can define
> buffer objects that share the same resv. Update calls to that helper
> accordingly.
> 
> Fixes: 995279d280d1 ("drm/ttm/tests: Add tests for ttm_bo functions")
> Suggested-by: Christian König 
> Signed-off-by: Karolina Stolarek 

Reviewed-by: Thomas Hellström 


> ---
>  drivers/gpu/drm/ttm/tests/ttm_bo_test.c   | 40 +++--
> --
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |  7 +++-
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.h |  3 +-
>  drivers/gpu/drm/ttm/tests/ttm_pool_test.c |  4 +-
>  drivers/gpu/drm/ttm/tests/ttm_resource_test.c |  2 +-
>  drivers/gpu/drm/ttm/tests/ttm_tt_test.c   | 20 +-
>  6 files changed, 45 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> index 1f8a4f8adc92..ffcfe5e6709a 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
> @@ -56,7 +56,7 @@ static void
> ttm_bo_reserve_optimistic_no_ticket(struct kunit *test)
>   struct ttm_buffer_object *bo;
>   int err;
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>  
>   err = ttm_bo_reserve(bo, params->interruptible, params-
> >no_wait, NULL);
>   KUNIT_ASSERT_EQ(test, err, 0);
> @@ -71,7 +71,7 @@ static void ttm_bo_reserve_locked_no_sleep(struct
> kunit *test)
>   bool no_wait = true;
>   int err;
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>  
>   /* Let's lock it beforehand */
>   dma_resv_lock(bo->base.resv, NULL);
> @@ -92,7 +92,7 @@ static void ttm_bo_reserve_no_wait_ticket(struct
> kunit *test)
>  
>   ww_acquire_init(, _ww_class);
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>  
>   err = ttm_bo_reserve(bo, interruptible, no_wait, );
>   KUNIT_ASSERT_EQ(test, err, -EBUSY);
> @@ -110,7 +110,7 @@ static void ttm_bo_reserve_double_resv(struct
> kunit *test)
>  
>   ww_acquire_init(, _ww_class);
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>  
>   err = ttm_bo_reserve(bo, interruptible, no_wait, );
>   KUNIT_ASSERT_EQ(test, err, 0);
> @@ -138,8 +138,8 @@ static void ttm_bo_reserve_deadlock(struct kunit
> *test)
>   bool no_wait = false;
>   int err;
>  
> - bo1 = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> - bo2 = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo1 = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
> + bo2 = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>  
>   ww_acquire_init(, _ww_class);
>   mutex_lock(>base.resv->lock.base);
> @@ -208,7 +208,7 @@ static void ttm_bo_reserve_interrupted(struct
> kunit *test)
>   struct task_struct *task;
>   int err;
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>  
>   task = kthread_create(threaded_ttm_bo_reserve, bo, "ttm-bo-
> reserve");
>  
> @@ -249,7 +249,7 @@ static void ttm_bo_unreserve_basic(struct kunit
> *test)
>   KUNIT_ASSERT_EQ(test, err, 0);
>   priv->ttm_dev = ttm_dev;
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>   bo->priority = bo_prio;
>  
>   err = ttm_resource_alloc(bo, place, );
> @@ -288,7 +288,7 @@ static void ttm_bo_unreserve_pinned(struct kunit
> *test)
>   KUNIT_ASSERT_EQ(test, err, 0);
>   priv->ttm_dev = ttm_dev;
>  
> - bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
> + bo = ttm_bo_kunit_init(test, test->priv, BO_SIZE, NULL);
>   place = ttm_place_kunit_init(test, mem_type, 0);
>  
>   dma_resv_lock(bo->base.resv, NULL);
> @@ -321,6 +321,7 @@ static void ttm_bo_unreserve_bulk(struct kunit
> *test)
>   struct ttm_resource *res1, *res2;
>   struct ttm_device *ttm_dev;
>   struct ttm_place *place;
> + struct dma_resv *resv;
>   uint32_t mem_type = TTM_PL_SYSTEM;
>   unsigned int bo_priority = 0;
>   int e

Re: [RFC PATCH v3 13/21] drm/exec: Rework contended locking

2024-05-22 Thread Thomas Hellström
On Wed, 2024-05-22 at 18:52 +0200, Christian König wrote:
> Am 22.05.24 um 16:32 schrieb Thomas Hellström:
> > On Wed, 2024-05-22 at 07:52 +0200, Christian König wrote:
> > > Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > > > If contention and backoff occurs during a drm_exec ww
> > > > transaction,
> > > > the contended lock is not locked again until the next orinary
> > > > attempt to lock a dma_resv lock. However, with the introduction
> > > > of
> > > > drm_exec_trylock(), that doesn't work, since the locking of the
> > > > contended lock needs to be a sleeping lock. Neither can we
> > > > ignore
> > > > locking the contended lock during a trylock since that would
> > > > violate
> > > > at least the ww_mutex annotations.
> > > > 
> > > > So resolve this by actually locking the contended lock during
> > > > drm_exec_retry_on_contention(). However, this introduces a new
> > > > point
> > > > of failure since locking the contended lock may return -EINTR.
> > > > 
> > > > Hence drm_exec_retry_on_contention() must take an error
> > > > parameter
> > > > and
> > > > also return a value indicating success.
> > > After thinking more about that I have to pretty clearly NAK this.
> > >    
> > I thought we were beyond upfront NAKing in the first reply :/
> 
> Well my memory could fail me, but I mentioned concerns on this
> approach 
> before.
> 
> I was a bit annoyed seeing that again. But could as well be that my 
> response never got out or that I'm mixing things up.

I haven't seen it at least. Last discussion on this I saw was
here. I didn't see a follow-up on that.

https://lore.kernel.org/dri-devel/953c157bf69df12d831a781f0f638d93717bb044.ca...@linux.intel.com/


> 
> > > It's an intentional design decision to guarantee that at the
> > > start of
> > > the loop no object is locked.
> > > 
> > > This is because Sima and I wanted to integrate userptr handling
> > > into
> > > drm_exec as well in the long term.
> > First I agree the interface looks worse with this patch.
> > But I thought generic userptr handling were going to end up as a
> > gpuvm
> > helper (without using GEM objects) as we've discussed previously.
> 
> We might be talking past each other. That sounds like SVM, e.g. on 
> demand paging.
> 
> What I mean is pre-faulting during command submission like radeon, 
> amdgpu and i915 do for the userptr handling.

Yes, then we're talking about the same thing.

We discussed in this thread here, started by Dave.

https://lore.kernel.org/dri-devel/CAPM=9twPgn+fpbkig0Vhjt=cJdHQFbNH_Z=srhszwuvlkha...@mail.gmail.com/

I still think the right place is in drm_gpuvm for this sort of stuff.
And I think that's the concluding argument by Sima as well.

In any case, If the planned drm_exec development is to be a full
execbuf helper, I think we need a capable sub-helper for ONLY the ww
transaction locking as well, with support for the various locking
primitives. In particular if we're going to be able to port i915 ww
transaction locking over. There are more uses of the ww locking
transacions than execbuf.

> 
> For that you need to re-start the whole handling similar to how you
> need 
> to re-start for the mutex locking when you detect that the page array
> is 
> stale, the difference is that you are not allowed to hold any resv
> locks 
> while pre-faulting.
> 
> That's why it is a requirement that the drm_exec loop starts without
> any 
> locks held.

But wouldn't you need an outer (userptr) loop and an inner
(ww_transaction) loop for this? Why would we want to re-validate
userptrs on -EDEADLKS?

> 
> > Anyway if still there would be helpers in drm_exec for some other
> > generic userptr solution, those need to be done before the
> > ww_acquire_ctx_init(). The contended locking here is done after, so
> > I
> > can't really see how these would clash.
> 
> Yes, that indeed was a problem. The ww_acquire_ctx_init() was 
> intentionally moved into drm_exec_cleanup() to partially prevent that
> issue.
> 
> I haven't fully figured out how to do handle everything exactly, but
> at 
> least in principle it can be made work. With this change here it
> becomes 
> impossible.
> 
> > Still, If we need to come up with another solution, I think it's
> > fair
> > we clearly sort out why.
> > 
> > > I think we should just document that drm_exec_trylock() can't be
> > > used
> > > to
> > > l

Re: [RFC PATCH v3 15/21] drm/exec: Add a snapshot capability

2024-05-22 Thread Thomas Hellström
On Wed, 2024-05-22 at 15:54 +0200, Thomas Hellström wrote:
> On Wed, 2024-05-22 at 13:27 +0200, Christian König wrote:
> > Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > > When validating a buffer object for submission, we might need to
> > > lock
> > > a number of object for eviction to make room for the validation.
> > > 
> > > This makes it pretty likely that validation will eventually
> > > succeed,
> > > since eventually the validating process will hold most dma_resv
> > > locks
> > > of the buffer objects residing in the memory type being validated
> > > for.
> > > 
> > > However, once validation of a single object has succeeded it
> > > might
> > > not
> > > be beneficial to hold on to those locks anymore, and the
> > > validator
> > > would want to drop the locks of all objects taken during
> > > validation.
> > 
> > Exactly avoiding that was one of the goals of developing the
> > drm_exec
> > object.
> > 
> > When objects are unlocked after evicting them it just gives
> > concurrent 
> > operations an opportunity to lock them and re-validate them into
> > the 
> > contended domain.
> > 
> > So why should that approach here be beneficial at all?
> 
> It's a matter of being nice to the rest of the system while *still
> guaranteeing progress*. For each object we're trying to validate, we
> keep on evicting other objects until we make progress even if we lock
> all the objects in the domain.
> 
> If we were unlocking after each eviction, we can't really guarantee
> progress.
> 
> OTOH, a concurrent locker of the object may well be one with higher
> priority (lower ticket number) just wanting to perform a pagefault
> 
> So it's a tradeoff between locking just locking other processes out
> to
> allow us to make one step of progress and to in addition hit them
> with
> the big sledgehammer.

I thought I'd also mention that the ideal solution here I think would
be to have an rw_mutex per manager. Ordinary allocations take it in
read mode, evictions take it in write mode. Now the bad thing is it
sits in between ww_mutexes so it would have to be a ww_rw_mutex which
would probably be too nasty to implement.

/Thomas

> 
> /Thomas
> 
> > 
> > Regards,
> > Christian.
> > 
> > > 
> > > Introduce a drm_exec snapshot functionality that can be used to
> > > record the locks held at a certain time, and a restore
> > > functionality
> > > that restores the drm_exec state to the snapshot by dropping all
> > > locks.
> > > 
> > > Snapshots can be nested if needed.
> > > 
> > > Cc: Christian König 
> > > Cc: Somalapuram Amaranath 
> > > Cc: Matthew Brost 
> > > Cc: 
> > > Signed-off-by: Thomas Hellström
> > > 
> > > ---
> > >   drivers/gpu/drm/drm_exec.c | 55
> > > +-
> > >   include/drm/drm_exec.h | 23 +++-
> > >   2 files changed, 76 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/drm_exec.c
> > > b/drivers/gpu/drm/drm_exec.c
> > > index 1383680ffa4a..9eea5d0d3a98 100644
> > > --- a/drivers/gpu/drm/drm_exec.c
> > > +++ b/drivers/gpu/drm/drm_exec.c
> > > @@ -57,6 +57,7 @@ static void drm_exec_unlock_all(struct drm_exec
> > > *exec)
> > >   struct drm_gem_object *obj;
> > >   unsigned long index;
> > >   
> > > + WARN_ON(exec->snap);
> > >   drm_exec_for_each_locked_object_reverse(exec, index,
> > > obj)
> > > {
> > >   dma_resv_unlock(obj->resv);
> > >   drm_gem_object_put(obj);
> > > @@ -90,6 +91,7 @@ void drm_exec_init(struct drm_exec *exec, u32
> > > flags, unsigned nr)
> > >   exec->num_objects = 0;
> > >   exec->contended = DRM_EXEC_DUMMY;
> > >   exec->prelocked = NULL;
> > > + exec->snap = NULL;
> > >   }
> > >   EXPORT_SYMBOL(drm_exec_init);
> > >   
> > > @@ -301,7 +303,6 @@ int drm_exec_lock_obj(struct drm_exec *exec,
> > > struct drm_gem_object *obj)
> > >   goto error_unlock;
> > >   
> > >   return 0;
> > > -
> > >   error_unlock:
> > >   dma_resv_unlock(obj->resv);
> > >   return ret;
> > > @@ -395,5 +396,57 @@ int drm_exec_prepare_array(struct drm_exec
> > > 

Re: [RFC PATCH v3 13/21] drm/exec: Rework contended locking

2024-05-22 Thread Thomas Hellström
On Wed, 2024-05-22 at 07:52 +0200, Christian König wrote:
> Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > If contention and backoff occurs during a drm_exec ww transaction,
> > the contended lock is not locked again until the next orinary
> > attempt to lock a dma_resv lock. However, with the introduction of
> > drm_exec_trylock(), that doesn't work, since the locking of the
> > contended lock needs to be a sleeping lock. Neither can we ignore
> > locking the contended lock during a trylock since that would
> > violate
> > at least the ww_mutex annotations.
> > 
> > So resolve this by actually locking the contended lock during
> > drm_exec_retry_on_contention(). However, this introduces a new
> > point
> > of failure since locking the contended lock may return -EINTR.
> > 
> > Hence drm_exec_retry_on_contention() must take an error parameter
> > and
> > also return a value indicating success.
> 
> After thinking more about that I have to pretty clearly NAK this.
>   
I thought we were beyond upfront NAKing in the first reply :/

> It's an intentional design decision to guarantee that at the start of
> the loop no object is locked.
> 
> This is because Sima and I wanted to integrate userptr handling into 
> drm_exec as well in the long term.

First I agree the interface looks worse with this patch.
But I thought generic userptr handling were going to end up as a gpuvm
helper (without using GEM objects) as we've discussed previously.
Anyway if still there would be helpers in drm_exec for some other
generic userptr solution, those need to be done before the
ww_acquire_ctx_init(). The contended locking here is done after, so I
can't really see how these would clash.

Still, If we need to come up with another solution, I think it's fair
we clearly sort out why.

> I think we should just document that drm_exec_trylock() can't be used
> to 
> lock the first BO in the loop and explicitly WARN if that's the case.

Unfortunately that's not sufficient for the general use-case. If we
want to keep the ttm_bo_vm approach of dropping the mmap lock when
there is contention on the bo resv, we need to be able to trylock on
first lock. Also bo creation is using trylock but might be able to use
a sleeping lock there. But if that sleeping lock triggers an -EDEADLK
(DEBUG_WW_MUTEX_SLOWPATH) we have the weird situation of referencing an
object that never was fully created as a contending object.

So the only really working alternative solution I can see is that
drm_exec_trylock simply fails if there is a contended lock and we'd
need to live with the weird bo creation situation described above.

/Thomas

> 
> Regards,
> Christian.
> 
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 16 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |  6 ++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |  4 +--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  8 ++---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  8 ++---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c |  4 +--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c  |  8 ++---
> >   drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  2 +-
> >   drivers/gpu/drm/drm_exec.c    | 35
> > ++-
> >   drivers/gpu/drm/drm_gpuvm.c   |  8 ++---
> >   drivers/gpu/drm/imagination/pvr_job.c |  2 +-
> >   drivers/gpu/drm/msm/msm_gem_submit.c  |  2 +-
> >   drivers/gpu/drm/nouveau/nouveau_uvmm.c    |  2 +-
> >   drivers/gpu/drm/tests/drm_exec_test.c | 12 +++
> >   drivers/gpu/drm/xe/xe_gt_pagefault.c  |  4 +--
> >   drivers/gpu/drm/xe/xe_vm.c    | 10 +++---
> >   include/drm/drm_exec.h    | 23 +---
> >   17 files changed, 92 insertions(+), 62 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index e4d4e55c08ad..4a08a692aa1f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -1152,12 +1152,12 @@ static int reserve_bo_and_vm(struct kgd_mem
> > *mem,
> >     drm_exec_init(>exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> >     drm_exec_until_all_locked(>exec) {
> >     ret = amdgpu_vm_lock_pd(vm, >exec, 2);
> > -   drm_exec_retry_on_contention(>exec);
> > +   ret = drm_exec_retry_

Re: [RFC PATCH v3 15/21] drm/exec: Add a snapshot capability

2024-05-22 Thread Thomas Hellström
On Wed, 2024-05-22 at 13:27 +0200, Christian König wrote:
> Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > When validating a buffer object for submission, we might need to
> > lock
> > a number of object for eviction to make room for the validation.
> > 
> > This makes it pretty likely that validation will eventually
> > succeed,
> > since eventually the validating process will hold most dma_resv
> > locks
> > of the buffer objects residing in the memory type being validated
> > for.
> > 
> > However, once validation of a single object has succeeded it might
> > not
> > be beneficial to hold on to those locks anymore, and the validator
> > would want to drop the locks of all objects taken during
> > validation.
> 
> Exactly avoiding that was one of the goals of developing the drm_exec
> object.
> 
> When objects are unlocked after evicting them it just gives
> concurrent 
> operations an opportunity to lock them and re-validate them into the 
> contended domain.
> 
> So why should that approach here be beneficial at all?

It's a matter of being nice to the rest of the system while *still
guaranteeing progress*. For each object we're trying to validate, we
keep on evicting other objects until we make progress even if we lock
all the objects in the domain.

If we were unlocking after each eviction, we can't really guarantee
progress.

OTOH, a concurrent locker of the object may well be one with higher
priority (lower ticket number) just wanting to perform a pagefault

So it's a tradeoff between locking just locking other processes out to
allow us to make one step of progress and to in addition hit them with
the big sledgehammer.

/Thomas

> 
> Regards,
> Christian.
> 
> > 
> > Introduce a drm_exec snapshot functionality that can be used to
> > record the locks held at a certain time, and a restore
> > functionality
> > that restores the drm_exec state to the snapshot by dropping all
> > locks.
> > 
> > Snapshots can be nested if needed.
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >   drivers/gpu/drm/drm_exec.c | 55
> > +-
> >   include/drm/drm_exec.h | 23 +++-
> >   2 files changed, 76 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_exec.c
> > b/drivers/gpu/drm/drm_exec.c
> > index 1383680ffa4a..9eea5d0d3a98 100644
> > --- a/drivers/gpu/drm/drm_exec.c
> > +++ b/drivers/gpu/drm/drm_exec.c
> > @@ -57,6 +57,7 @@ static void drm_exec_unlock_all(struct drm_exec
> > *exec)
> >     struct drm_gem_object *obj;
> >     unsigned long index;
> >   
> > +   WARN_ON(exec->snap);
> >     drm_exec_for_each_locked_object_reverse(exec, index, obj)
> > {
> >     dma_resv_unlock(obj->resv);
> >     drm_gem_object_put(obj);
> > @@ -90,6 +91,7 @@ void drm_exec_init(struct drm_exec *exec, u32
> > flags, unsigned nr)
> >     exec->num_objects = 0;
> >     exec->contended = DRM_EXEC_DUMMY;
> >     exec->prelocked = NULL;
> > +   exec->snap = NULL;
> >   }
> >   EXPORT_SYMBOL(drm_exec_init);
> >   
> > @@ -301,7 +303,6 @@ int drm_exec_lock_obj(struct drm_exec *exec,
> > struct drm_gem_object *obj)
> >     goto error_unlock;
> >   
> >     return 0;
> > -
> >   error_unlock:
> >     dma_resv_unlock(obj->resv);
> >     return ret;
> > @@ -395,5 +396,57 @@ int drm_exec_prepare_array(struct drm_exec
> > *exec,
> >   }
> >   EXPORT_SYMBOL(drm_exec_prepare_array);
> >   
> > +/**
> > + * drm_exec_restore() - Restore the drm_exec state to the point of
> > a snapshot.
> > + * @exec: The drm_exec object with the state.
> > + * @snap: The snapshot state.
> > + *
> > + * Restores the drm_exec object by means of unlocking and dropping
> > references
> > + * to objects locked after the snapshot.
> > + */
> > +void drm_exec_restore(struct drm_exec *exec, struct
> > drm_exec_snapshot *snap)
> > +{
> > +   struct drm_gem_object *obj;
> > +   unsigned int index;
> > +
> > +   exec->snap = snap->saved_snap;
> > +
> > +   drm_exec_for_each_locked_object_reverse(exec, index, obj)
> > {
> > +   if (index + 1 == snap->num_locked)
> > +   break;
> > +
> > +   dma_resv_unlock(obj->resv);
> > +   drm_gem_object_put(obj);
> > +   exec

Re: [RFC PATCH v3 16/21] drm/exec: Introduce an evict mode

2024-05-22 Thread Thomas Hellström
On Wed, 2024-05-22 at 15:28 +0200, Christian König wrote:
> Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > Locking for eviction is in some way different from locking for
> > submission:
> > 
> > 1) We can't lock objects that are already locked for submission,
> > hence DRM_EXEC_IGNORE_DUPLICATES must be unset.
> > 2) We must be able to re-lock objects locked for eviction,
> > either for submission or for yet another eviction, in
> > particular objects sharing a single resv must be considered.
> 
> Yeah, I was already thinking about that as well.
> 
> My idea so far was to have a separate function for locking eviction
> BOs. 
> This function would then use trylock or blocking depending on some
> setting.

Downstream i915 also has a separate locking function for this. I'm fine
with that as well. Probably the most sane choice.


> 
> > 3) There is no point to keep a contending object after the
> > transaction restart. We don't know whether we actually want to use
> > it again.
> 
> Well that isn't true as far as I know.
> 
> If we don't use trylock we still need to lock the object after
> rollback 
> to make sure that we waited for it to become available.

Yes, the transaction restart mentioned above is *after* the relaxation,
so the rollback becomes:

unlock_all
lock_contending_lock.
unlock_contending_lock.
drop_contending lock.


/Thomas


> 
> Regards,
> Christian.
> 
> > So introduce a drm_exec evict mode, and for now instead of
> > explicitly setting it using a function call or implement separate
> > locking functions that use evict mode, assume evict mode if
> > there is a snapshot registered. This can easily be changed later.
> > 
> > To keep track of resvs locked for eviction, use a pointer set
> > implemented by an xarray. This is probably not the most efficient
> > data structure but used as an easy-to-implement first approach.
> > If the set is empty (evict mode never used), the performance-
> > and memory usage impact will be very small.
> > 
> > TODO: Probably want to implement the set using an open addressing
> > hash table.
> > 
> > Cc: Christian König 
> > Cc: Somalapuram Amaranath 
> > Cc: Matthew Brost 
> > Cc: 
> > Signed-off-by: Thomas Hellström 
> > ---
> >   drivers/gpu/drm/drm_exec.c | 77
> > ++
> >   include/drm/drm_exec.h | 15 
> >   2 files changed, 85 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_exec.c
> > b/drivers/gpu/drm/drm_exec.c
> > index 9eea5d0d3a98..ea79d96f5439 100644
> > --- a/drivers/gpu/drm/drm_exec.c
> > +++ b/drivers/gpu/drm/drm_exec.c
> > @@ -65,6 +65,10 @@ static void drm_exec_unlock_all(struct drm_exec
> > *exec)
> >   
> >     drm_gem_object_put(exec->prelocked);
> >     exec->prelocked = NULL;
> > +
> > +   /* garbage collect */
> > +   xa_destroy(>resv_set);
> > +   xa_init(>resv_set);
> >   }
> >   
> >   /**
> > @@ -92,6 +96,8 @@ void drm_exec_init(struct drm_exec *exec, u32
> > flags, unsigned nr)
> >     exec->contended = DRM_EXEC_DUMMY;
> >     exec->prelocked = NULL;
> >     exec->snap = NULL;
> > +   exec->drop_contended = false;
> > +   xa_init(>resv_set);
> >   }
> >   EXPORT_SYMBOL(drm_exec_init);
> >   
> > @@ -110,6 +116,7 @@ void drm_exec_fini(struct drm_exec *exec)
> >     drm_gem_object_put(exec->contended);
> >     ww_acquire_fini(>ticket);
> >     }
> > +   xa_destroy(>resv_set);
> >   }
> >   EXPORT_SYMBOL(drm_exec_fini);
> >   
> > @@ -139,6 +146,30 @@ bool drm_exec_cleanup(struct drm_exec *exec)
> >   }
> >   EXPORT_SYMBOL(drm_exec_cleanup);
> >   
> > +static unsigned long drm_exec_resv_to_key(const struct dma_resv
> > *resv)
> > +{
> > +   return (unsigned long)resv / __alignof__(typeof(*resv));
> > +}
> > +
> > +static void
> > +drm_exec_resv_set_erase(struct drm_exec *exec, unsigned long key)
> > +{
> > +   if (xa_load(>resv_set, key))
> > +   xa_erase(>resv_set, key);
> > +}
> > +
> > +static bool drm_exec_in_evict_mode(struct drm_exec *exec)
> > +{
> > +   return !!exec->snap;
> > +}
> > +
> > +static void drm_exec_set_evict_mode(struct drm_exec *exec,
> > +       struct drm_exec_snapshot
> > *snap)
> > +{
> > +   exec->snap = snap;
> > +   exec->flags &= ~DRM_EXEC_IGNORE_DUPLICATES;
> > +}
> > +
&

Re: [PATCH v3 00/21] TTM shrinker helpers and xe buffer object shrinker

2024-05-21 Thread Thomas Hellström
Hi, all

On Tue, 2024-05-21 at 09:16 +0200, Thomas Hellström wrote:
> This series implements TTM shrinker / eviction helpers and an xe bo
> shrinker. It builds on two previous series, *and obsoletes these*.
> First
> 
> https://www.mail-archive.com/dri-devel@lists.freedesktop.org/msg484425.html
> 
> Second the previous TTM shrinker series
> 
> https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/
> 
> Where the comment about layering
> https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2...@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9
> 
> now addressed, and this version also implements shmem objects for
> backup
> rather than direct swap-cache insertions, which was used in the
> previuos
> series. It turns out that with per-page backup / shrinking, shmem
> objects
> appears to work just as well as direct swap-cache insertions with the
> added benefit that was introduced in the previous TTM shrinker series
> to
> avoid running out of swap entries isn't really needed.
> 
> Patch 1-4 implements restartable LRU list iteration.
> 
> Patch 5 implements a LRU walker + resv locking helper
> 
> Patch 6 moves TTM swapping over to the walker.
> 
> Patch 7 moves TTM eviction over to the walker.
> 
> Patch 8 could in theory be skipped but introduces a possibility to
> easily
> add or test multiple backup backends, like the direct swap-cache
> insertion or even files into fast dedicated nvme storage for for
> example.
> 
> Patch 9 introduces helpers in the ttm_pool code for page-by-page
> shrinking
> and recovery. It avoids having to temporarily allocate a huge amount
> of
> memory to be able to shrink a buffer object. It also introduces the
> possibility to immediately write-back pages if needed, since that
> tends
> to be a bit delayed when left to kswapd.
> 
> Patch 10 Adds a simple error injection to the above code to help
> increase
> test coverage.
> 
> Patch 11 Implements an xe bo shrinker and a common helper in TTM for
> shrinking.
> 
> Patch 12-21 are really a separate POC series, for introducing
> drm_exec locking
> in TTM. The patch touches both drm_exec and dma-buf and is for now
> marked as
> an RFC:
> 
> Patch 12 Introduces dma_resv_trylock_ctx.
> 
> Patches 13-14 deal with introducing drm_exec_trylock.
> 
> Patch 15 adds a snapshot capability to drm_exec.
> 
> Patch 16 adds an evict mode locking capability to drm_exec
> 
> Patch 17 converts the LRU + locking walker to drm_exec.
> 
> Patch 18 converts TTM vm to use drm_exec.
> 
> Patch 19 converts the xe fault handler to drm_exec.
> 
> Patch 20 converts bo initialization locking to drm_exec
> 
> Patch 21 introduces drm_exec locking around some of the
> bo validation callsites in drm_exec.
> 
> v2:
> - Squash obsolete revision history in the patch commit messages.
> - Fix a couple of review comments by Christian
> - Don't store the mem_type in the TTM managers but in the
>   resource cursor.
> - Rename introduced TTM *back_up* function names to *backup*
> - Add ttm pool recovery fault injection.
> - Shrinker xe kunit test
> - Various bugfixes
> 
> v3:
> - Address some review comments from Matthew Brost and Christian
> König.
> - Use the restartable LRU walk for TTM swapping and eviction.
> - Provide a POC drm_exec locking implementation for exhaustive
>   eviction. (Christian König).
> 
> Cc: Somalapuram Amaranath 
> Cc: Christian König 
> Cc: Matthew Brost 
> Cc: 

Now with a POC available how the drm_exec locking could be done in TTM
(the RFC part) I think the best approach is to get patch 1-11 reviewed
and pushed, and we could then settle on a solution for the drm_exec
part as a follow-up. Undoubtedly there are a couple of things that need
discussion there, in particular the resv_set, the snapshot and the
trylock memory allocation.

/Thomas



[RFC PATCH v3 21/21] drm/xe: Initial support for drm exec locking during validate

2024-05-21 Thread Thomas Hellström
Initial stab at converting xe_bo validation to drm_exec locking
where it matters most. (Low hanging fruit). For a couple of call
sites as well as for bo allocation, the passing down the call
chaing of the drm_exec object may turn out a bit tricky.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +-
 drivers/gpu/drm/xe/tests/xe_bo.c   | 6 +++---
 drivers/gpu/drm/xe/tests/xe_dma_buf.c  | 4 ++--
 drivers/gpu/drm/xe/tests/xe_migrate.c  | 2 +-
 drivers/gpu/drm/xe/xe_bo.c | 8 +---
 drivers/gpu/drm/xe/xe_bo.h | 4 +++-
 drivers/gpu/drm/xe/xe_dma_buf.c| 2 +-
 drivers/gpu/drm/xe/xe_ggtt.c   | 2 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c   | 2 +-
 drivers/gpu/drm/xe/xe_vm.c | 4 ++--
 10 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c 
b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 36e15c4961c1..85f37dd7ecb1 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -289,7 +289,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct 
intel_framebuffer *fb,
if (IS_DGFX(xe))
ret = xe_bo_migrate(bo, XE_PL_VRAM0);
else
-   ret = xe_bo_validate(bo, NULL, true);
+   ret = xe_bo_validate(bo, NULL, true, NULL);
if (!ret)
ttm_bo_pin(>ttm);
ttm_bo_unreserve(>ttm);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 7576d362020f..410579f75a39 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -30,7 +30,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct 
xe_bo *bo,
u32 offset;
 
/* Move bo to VRAM if not already there. */
-   ret = xe_bo_validate(bo, NULL, false);
+   ret = xe_bo_validate(bo, NULL, false, NULL);
if (ret) {
KUNIT_FAIL(test, "Failed to validate bo.\n");
return ret;
@@ -276,7 +276,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct 
xe_tile *tile, struc
if (i) {
down_read(>lock);
xe_vm_lock(vm, false);
-   err = xe_bo_validate(bo, bo->vm, false);
+   err = xe_bo_validate(bo, bo->vm, false, NULL);
xe_vm_unlock(vm);
up_read(>lock);
if (err) {
@@ -285,7 +285,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct 
xe_tile *tile, struc
goto cleanup_all;
}
xe_bo_lock(external, false);
-   err = xe_bo_validate(external, NULL, false);
+   err = xe_bo_validate(external, NULL, false, NULL);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo valid err=%pe\n",
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c 
b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index e7f9b531c465..ef88b4dd184c 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -81,7 +81,7 @@ static void check_residency(struct kunit *test, struct xe_bo 
*exported,
}
 
/* Re-validate the importer. This should move also exporter in. */
-   ret = xe_bo_validate(imported, NULL, false);
+   ret = xe_bo_validate(imported, NULL, false, NULL);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Validating importer failed with 
err=%d.\n",
@@ -157,7 +157,7 @@ static void xe_test_dmabuf_import_same_driver(struct 
xe_device *xe)
 
/* Is everything where we expect it to be? */
xe_bo_lock(import_bo, false);
-   err = xe_bo_validate(import_bo, NULL, false);
+   err = xe_bo_validate(import_bo, NULL, false, NULL);
 
/* Pinning in VRAM is not allowed. */
if (!is_dynamic(params) &&
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c 
b/drivers/gpu/drm/xe/tests/xe_migrate.c
index b6e7f80c3774..0feb99d3ef7d 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -90,7 +90,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
return;
}
 
-   err = xe_bo_validate(remote, NULL, false);
+   err = xe_bo_validate(remote, NULL, false, NULL);
if (err) {
KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n",
   str, err);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 27d7d36401b5..f33120f3a82

[RFC PATCH v3 19/21] drm/xe: Use drm_exec for fault locking

2024-05-21 Thread Thomas Hellström
Similar to how TTM vm does this, convert the drm/xe fault
handler to use drm_exec locking.

Cc: Christian König 
Cc: Somalapuram Amaranath 
Cc: Matthew Brost 
Cc: 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/xe/xe_bo.c | 38 +++---
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3c56858e0751..27d7d36401b5 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1217,29 +1217,37 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
struct xe_device *xe = to_xe_device(ddev);
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
+   struct drm_exec exec;
vm_fault_t ret;
-   int idx;
+   int idx, err;
 
if (needs_rpm)
xe_pm_runtime_get(xe);
 
-   ret = ttm_bo_vm_reserve(tbo, vmf, NULL);
-   if (ret)
-   goto out;
+   drm_exec_init(, DRM_EXEC_INTERRUPTIBLE_WAIT, 16);
+   drm_exec_until_all_locked() {
+   ret = ttm_bo_vm_reserve(tbo, vmf, );
+   err = drm_exec_retry_on_contention(, 0);
+   if (err)
+   ret = VM_FAULT_NOPAGE;
+   if (ret)
+   goto out;
 
-   if (drm_dev_enter(ddev, )) {
-   trace_xe_bo_cpu_fault(bo);
+   if (drm_dev_enter(ddev, )) {
+   trace_xe_bo_cpu_fault(bo);
 
-   ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
-  TTM_BO_VM_NUM_PREFAULT,
-  NULL);
-   drm_dev_exit(idx);
-   } else {
-   ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+   ret = ttm_bo_vm_fault_reserved(vmf, 
vmf->vma->vm_page_prot,
+  TTM_BO_VM_NUM_PREFAULT,
+  );
+   drm_dev_exit(idx);
+   err = drm_exec_retry_on_contention(, 0);
+   if (err)
+   ret = VM_FAULT_NOPAGE;
+   } else {
+   ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+   }
}
 
-   if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
-   goto out;
/*
 * ttm_bo_vm_reserve() already has dma_resv_lock.
 */
@@ -1250,8 +1258,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
mutex_unlock(>mem_access.vram_userfault.lock);
}
 
-   dma_resv_unlock(tbo->base.resv);
 out:
+   drm_exec_fini();
if (needs_rpm)
xe_pm_runtime_put(xe);
 
-- 
2.44.0



  1   2   3   4   5   6   7   8   9   10   >