Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOSPC and then to -ENOMEM before reaching
the driver, so that the driver will need to backoff and possibly retry
without being able to keep the ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.
v6:
- Some cleanup and added code comments (Matthew Brost)
- Clarified the ticketlock in the commit message (Matthew Brost)
v7:
- Use s64 rather than long for the target and progress
  (Christian König)
- Update documentation to not encourage using pages as a
  progress measure. (Christian König)
- Remove cond_resched(). (Christian König)

Cc: Christian König <christian.koe...@amd.com>
Cc: Somalapuram Amaranath <amaranath.somalapu...@amd.com>
Cc: Matthew Brost <matthew.br...@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.br...@intel.com> #v6
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 151 ++++++++++++++++++++++++++++++
 include/drm/ttm/ttm_bo.h          |  35 +++++++
 2 files changed, 186 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0b3f4267130c..c2759add58f5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -768,3 +768,154 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
        ttm_tt_destroy(bo->bdev, ttm);
        return ret;
 }
+
+static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
+                                struct ttm_buffer_object *bo,
+                                bool *needs_unlock)
+{
+       struct ttm_operation_ctx *ctx = walk->ctx;
+
+       *needs_unlock = false;
+
+       if (dma_resv_trylock(bo->base.resv)) {
+               *needs_unlock = true;
+               return true;
+       }
+
+       if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
+               dma_resv_assert_held(bo->base.resv);
+               return true;
+       }
+
+       return false;
+}
+
+static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
+                                  struct ttm_buffer_object *bo,
+                                  bool *needs_unlock)
+{
+       struct dma_resv *resv = bo->base.resv;
+       int ret;
+
+       if (walk->ctx->interruptible)
+               ret = dma_resv_lock_interruptible(resv, walk->ticket);
+       else
+               ret = dma_resv_lock(resv, walk->ticket);
+
+       if (!ret) {
+               *needs_unlock = true;
+               /*
+                * Only a single ticketlock per loop. Ticketlocks are prone
+                * to return -EDEADLK causing the eviction to fail, so
+                * after waiting for the ticketlock, revert back to
+                * trylocking for this walk.
+                */
+               walk->ticket = NULL;
+       } else if (ret == -EDEADLK) {
+               /* Caller needs to exit the ww transaction. */
+               ret = -ENOSPC;
+       }
+
+       return ret;
+}
+
+static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
+{
+       if (locked)
+               dma_resv_unlock(bo->base.resv);
+}
+
+/**
+ * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
+ * valid items.
+ * @walk: describe the walks and actions taken
+ * @bdev: The TTM device.
+ * @man: The struct ttm_resource manager whose LRU lists we're walking.
+ * @target: The end condition for the walk.
+ *
+ * The LRU lists of @man are walk, and for each struct ttm_resource 
encountered,
+ * the corresponding ttm_buffer_object is locked and taken a reference on, and
+ * the LRU lock is dropped. the LRU lock may be dropped before locking and, in
+ * that case, it's verified that the item actually remains on the LRU list 
after
+ * the lock, and that the buffer object didn't switch resource in between.
+ *
+ * With a locked object, the actions indicated by @walk->process_bo are
+ * performed, and after that, the bo is unlocked, the refcount dropped and the
+ * next struct ttm_resource is processed. Here, the walker relies on
+ * TTM's restartable LRU list implementation.
+ *
+ * Typically @walk->process_bo() would return the number of pages evicted,
+ * swapped or shrunken, so that when the total exceeds @target, or when the
+ * LRU list has been walked in full, iteration is terminated. It's also 
terminated
+ * on error. Note that the definition of @target is done by the caller, it
+ * could have a different meaning than the number of pages.
+ *
+ * Note that the way dma_resv individualization is done, locking needs to be 
done
+ * either with the LRU lock held (trylocking only) or with a reference on the
+ * object.
+ *
+ * Return: The progress made towards target or negative error code on error.
+ */
+s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
+                          struct ttm_resource_manager *man, s64 target)
+{
+       struct ttm_resource_cursor cursor;
+       struct ttm_resource *res;
+       s64 progress = 0;
+       s64 lret;
+
+       spin_lock(&bdev->lru_lock);
+       ttm_resource_manager_for_each_res(man, &cursor, res) {
+               struct ttm_buffer_object *bo = res->bo;
+               bool bo_needs_unlock = false;
+               bool bo_locked = false;
+               int mem_type;
+
+               /*
+                * Attempt a trylock before taking a reference on the bo,
+                * since if we do it the other way around, and the trylock 
fails,
+                * we need to drop the lru lock to put the bo.
+                */
+               if (ttm_lru_walk_trylock(walk, bo, &bo_needs_unlock))
+                       bo_locked = true;
+               else if (!walk->ticket || walk->ctx->no_wait_gpu ||
+                        walk->trylock_only)
+                       continue;
+
+               if (!ttm_bo_get_unless_zero(bo)) {
+                       ttm_lru_walk_unlock(bo, bo_needs_unlock);
+                       continue;
+               }
+
+               mem_type = res->mem_type;
+               spin_unlock(&bdev->lru_lock);
+
+               lret = 0;
+               if (!bo_locked)
+                       lret = ttm_lru_walk_ticketlock(walk, bo, 
&bo_needs_unlock);
+
+               /*
+                * Note that in between the release of the lru lock and the
+                * ticketlock, the bo may have switched resource,
+                * and also memory type, since the resource may have been
+                * freed and allocated again with a different memory type.
+                * In that case, just skip it.
+                */
+               if (!lret && bo->resource && bo->resource->mem_type == mem_type)
+                       lret = walk->ops->process_bo(walk, bo);
+
+               ttm_lru_walk_unlock(bo, bo_needs_unlock);
+               ttm_bo_put(bo);
+               if (lret == -EBUSY || lret == -EALREADY)
+                       lret = 0;
+               progress = (lret < 0) ? lret : progress + lret;
+
+               spin_lock(&bdev->lru_lock);
+               if (progress < 0 || progress >= target)
+                       break;
+       }
+       ttm_resource_cursor_fini_locked(&cursor);
+       spin_unlock(&bdev->lru_lock);
+
+       return progress;
+}
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index ef0f52f56ebc..21fa9d5964ec 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -194,6 +194,41 @@ struct ttm_operation_ctx {
        uint64_t bytes_moved;
 };
 
+struct ttm_lru_walk;
+
+/** struct ttm_lru_walk_ops - Operations for a LRU walk. */
+struct ttm_lru_walk_ops {
+       /**
+        * process_bo - Process this bo.
+        * @walk: struct ttm_lru_walk describing the walk.
+        * @bo: A locked and referenced buffer object.
+        *
+        * Return: Negative error code on error, User-defined positive value
+        * (typically, but not always, size of the processed bo) on success.
+        * On success, the returned values are summed by the walk and the
+        * walk exits when its target is met.
+        * 0 also indicates success, -EBUSY means this bo was skipped.
+        */
+       s64 (*process_bo)(struct ttm_lru_walk *walk, struct ttm_buffer_object 
*bo);
+};
+
+/**
+ * struct ttm_lru_walk - Structure describing a LRU walk.
+ */
+struct ttm_lru_walk {
+       /** @ops: Pointer to the ops structure. */
+       const struct ttm_lru_walk_ops *ops;
+       /** @ctx: Pointer to the struct ttm_operation_ctx. */
+       struct ttm_operation_ctx *ctx;
+       /** @ticket: The struct ww_acquire_ctx if any. */
+       struct ww_acquire_ctx *ticket;
+       /** @tryock_only: Only use trylock for locking. */
+       bool trylock_only;
+};
+
+s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
+                          struct ttm_resource_manager *man, s64 target);
+
 /**
  * ttm_bo_get - reference a struct ttm_buffer_object
  *
-- 
2.44.0

Reply via email to