Hi Boris,

On 1/9/26 13:08, Boris Brezillon wrote:
From: Akash Goel <[email protected]>

This implementation is losely based on the MSM shrinker, and it's
relying on the drm_gpuvm eviction/validation infrastructure.

Right now we only support swapout/eviction, but we could add an extra
flag to specify when buffer content doesn't need to be preserved to
avoid the swapout/swapin dance.

Locking is a bit of a nightmare, but using _trylock() all the way in
the reclaim path seems to make lockdep happy. And yes, we might be
missing opportunities to reclaim when the system is under heavy GPU
load/heavy memory pressure/heavy GPU VM activity, but that's better
than no reclaim at all.

Signed-off-by: Akash Goel <[email protected]>
Co-developed-by: Boris Brezillon <[email protected]>
Signed-off-by: Boris Brezillon <[email protected]>
---
  drivers/gpu/drm/panthor/panthor_device.c |  11 +-
  drivers/gpu/drm/panthor/panthor_device.h |  73 ++++
  drivers/gpu/drm/panthor/panthor_gem.c    | 427 ++++++++++++++++++++++-
  drivers/gpu/drm/panthor/panthor_gem.h    |  67 ++++
  drivers/gpu/drm/panthor/panthor_mmu.c    | 338 +++++++++++++++++-
  drivers/gpu/drm/panthor/panthor_mmu.h    |   8 +
  6 files changed, 901 insertions(+), 23 deletions(-)


diff --git a/drivers/gpu/drm/panthor/panthor_device.h 
b/drivers/gpu/drm/panthor/panthor_device.h
index f35e52b9546a..bc348aa9634e 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -14,6 +14,7 @@
  #include <linux/spinlock.h>
#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
  #include <drm/drm_mm.h>
  #include <drm/gpu_scheduler.h>
  #include <drm/panthor_drm.h>
@@ -157,6 +158,78 @@ struct panthor_device {
        /** @devfreq: Device frequency scaling management data. */
        struct panthor_devfreq *devfreq;
+static bool is_gpu_mapped(struct panthor_gem_object *bo,
+                         enum panthor_gem_reclaim_state *state)
+{
+       struct drm_gpuvm *vm = NULL;
+       struct drm_gpuvm_bo *vm_bo;
+
+       drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+               if (!vm) {
+                       *state = PANTHOR_GEM_GPU_MAPPED_PRIVATE;
+                       vm = vm_bo->vm;
+               } else if (vm != vm_bo->vm) {
+                       *state = PANTHOR_GEM_GPU_MAPPED_SHARED;
+                       break;
+               }
+       }
+
+       return !!vm;
+}
+
+static enum panthor_gem_reclaim_state
+panthor_gem_evaluate_reclaim_state_locked(struct panthor_gem_object *bo)
+{
+       enum panthor_gem_reclaim_state gpu_mapped_state;
+
+       dma_resv_assert_held(bo->base.resv);
+       lockdep_assert_held(&bo->base.gpuva.lock);
+
+       /* If pages have not been allocated, there's nothing to reclaim. */
+       if (!bo->backing.pages)
+               return PANTHOR_GEM_UNRECLAIMABLE;
+
+       /* If memory is pinned, we prevent reclaim. */
+       if (refcount_read(&bo->backing.pin_count))
+               return PANTHOR_GEM_UNRECLAIMABLE;
+
+       if (is_gpu_mapped(bo, &gpu_mapped_state))
+               return gpu_mapped_state;
+
+       if (refcount_read(&bo->cmap.mmap_count) && bo->backing.pages)
+               return PANTHOR_GEM_MMAPPED;
+
+       return PANTHOR_GEM_UNUSED;
+}
+
+void panthor_gem_update_reclaim_state_locked(struct panthor_gem_object *bo,
+                                            enum panthor_gem_reclaim_state 
*old_statep)
+{
+       struct panthor_device *ptdev = container_of(bo->base.dev, struct 
panthor_device, base);
+       enum panthor_gem_reclaim_state old_state = bo->reclaim_state;
+       enum panthor_gem_reclaim_state new_state;
+       bool was_gpu_mapped, is_gpu_mapped;
+
+       if (old_statep)
+               *old_statep = old_state;
+
+       new_state = panthor_gem_evaluate_reclaim_state_locked(bo);
+       if (new_state == old_state)
+               return;
+
+       was_gpu_mapped = old_state == PANTHOR_GEM_GPU_MAPPED_SHARED ||
+                        old_state == PANTHOR_GEM_GPU_MAPPED_PRIVATE;
+       is_gpu_mapped = new_state == PANTHOR_GEM_GPU_MAPPED_SHARED ||
+                       new_state == PANTHOR_GEM_GPU_MAPPED_PRIVATE;
+
+       if (is_gpu_mapped && !was_gpu_mapped)
+               ptdev->reclaim.gpu_mapped_count += bo->base.size >> PAGE_SHIFT;
+       else if (!is_gpu_mapped && was_gpu_mapped)
+               ptdev->reclaim.gpu_mapped_count -= bo->base.size >> PAGE_SHIFT;
+
+       switch (new_state) {
+       case PANTHOR_GEM_UNUSED:
+               drm_gem_lru_move_tail(&ptdev->reclaim.unused, &bo->base);
+               break;
+       case PANTHOR_GEM_MMAPPED:
+               drm_gem_lru_move_tail(&ptdev->reclaim.mmapped, &bo->base);
+               break;
+       case PANTHOR_GEM_GPU_MAPPED_PRIVATE:
+               panthor_vm_update_bo_reclaim_lru_locked(bo);
+               break;
+       case PANTHOR_GEM_GPU_MAPPED_SHARED:
+               drm_gem_lru_move_tail(&ptdev->reclaim.gpu_mapped_shared, 
&bo->base);
+               break;
+       case PANTHOR_GEM_UNRECLAIMABLE:
+               drm_gem_lru_remove(&bo->base);
+               break;
+       default:
+               break;
+       }
+
+       bo->reclaim_state = new_state;
+}
+
  static void
  panthor_gem_backing_cleanup(struct panthor_gem_object *bo)
  {
@@ -153,8 +249,12 @@ static int panthor_gem_backing_pin_locked(struct 
panthor_gem_object *bo)
                return 0;
ret = panthor_gem_backing_get_pages_locked(bo);
-       if (!ret)
+       if (!ret) {
                refcount_set(&bo->backing.pin_count, 1);
+               mutex_lock(&bo->base.gpuva.lock);
+               panthor_gem_update_reclaim_state_locked(bo, NULL);
+               mutex_unlock(&bo->base.gpuva.lock);
+       }
return ret;
  }
@@ -167,8 +267,12 @@ static void panthor_gem_backing_unpin_locked(struct 
panthor_gem_object *bo)
        /* We don't release anything when pin_count drops to zero.
         * Pages stay there until an explicit cleanup is requested.
         */
-       if (!refcount_dec_not_one(&bo->backing.pin_count))
+       if (!refcount_dec_not_one(&bo->backing.pin_count)) {
                refcount_set(&bo->backing.pin_count, 0);
+               mutex_lock(&bo->base.gpuva.lock);
+               panthor_gem_update_reclaim_state_locked(bo, NULL);
+               mutex_unlock(&bo->base.gpuva.lock);
+       }
  }
static void
@@ -531,6 +635,49 @@ void panthor_gem_unpin(struct panthor_gem_object *bo)
        dma_resv_unlock(bo->base.resv);
  }
+
+static void panthor_gem_evict_locked(struct panthor_gem_object *bo)
+{
+       dma_resv_assert_held(bo->base.resv);
+       lockdep_assert_held(&bo->base.gpuva.lock);
+
+       if (drm_WARN_ON_ONCE(bo->base.dev, drm_gem_is_imported(&bo->base)))
+               return;
+
+       if (drm_WARN_ON_ONCE(bo->base.dev, 
refcount_read(&bo->backing.pin_count)))
+               return;
+
+       if (drm_WARN_ON_ONCE(bo->base.dev, !bo->backing.pages))
+               return;
+
+       panthor_gem_dev_map_cleanup(bo);
+       panthor_gem_backing_cleanup(bo);
+       panthor_gem_update_reclaim_state_locked(bo, NULL);
+}
+

+
+static bool should_wait(enum panthor_gem_reclaim_state reclaim_state)
+{
+       return reclaim_state == PANTHOR_GEM_GPU_MAPPED_PRIVATE ||
+              reclaim_state == PANTHOR_GEM_GPU_MAPPED_SHARED;
+}
+
+bool panthor_gem_try_evict(struct drm_gem_object *obj,
+                          struct ww_acquire_ctx *ticket)
+{
+       /*
+        * Track last locked entry for unwinding locks in error and
+        * success paths
+        */
+       struct panthor_gem_object *bo = to_panthor_bo(obj);
+       struct drm_gpuvm_bo *vm_bo, *last_locked = NULL;
+       enum panthor_gem_reclaim_state old_state;
+       int ret = 0;
+
+       /* To avoid potential lock ordering issue between bo_gpuva and
+        * mapping->i_mmap_rwsem, unmap the pages from CPU side before
+        * acquring the bo_gpuva lock. As the bo_resv lock is held, CPU
+        * page fault handler won't be able to map in the pages whilst
+        * eviction is in progress.
+        */
+       drm_vma_node_unmap(&bo->base.vma_node, 
bo->base.dev->anon_inode->i_mapping);
+
+       /* We take this lock when walking the list to prevent
+        * insertion/deletion.
+        */
+       /* We can only trylock in that path, because
+        * - allocation might happen while some of these locks are held
+        * - lock ordering is different in other paths
+        *     vm_resv -> bo_resv -> bo_gpuva
+        *     vs
+        *     bo_resv -> bo_gpuva -> vm_resv
+        *
+        * If we fail to lock that's fine, we back off and will get
+        * back to it later.
+        */
+       if (!mutex_trylock(&bo->base.gpuva.lock))
+               return false;
+
+       drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+               struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm);
+
+               if (resv == obj->resv)
+                       continue;
+
+               if (!dma_resv_trylock(resv)) {
+                       ret = -EDEADLK;
+                       goto out_unlock;
+               }
+
+               last_locked = vm_bo;
+       }
+
+       /* Update the state before trying to evict the buffer, if the state was
+        * updated to something that's harder to reclaim (higher value in the
+        * enum), skip it (will be processed when the relevant LRU is).
+        */
+       panthor_gem_update_reclaim_state_locked(bo, &old_state);
+       if (old_state < bo->reclaim_state) {
+               ret = -EAGAIN;
+               goto out_unlock;
+       }
+
+       /* Wait was too long, skip. */
+       if (should_wait(bo->reclaim_state) &&
+           dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false, 
10) <= 0) {
+               ret = -ETIMEDOUT;
+               goto out_unlock;
+       }
+
+       /* Couldn't teardown the GPU mappings? Skip. */
+       ret = panthor_vm_evict_bo_mappings_locked(bo);
+       if (ret)
+               goto out_unlock;
+
+       /* If everything went fine, evict the object. */
+       panthor_gem_evict_locked(bo);
+
+out_unlock:
+       if (last_locked) {
+               drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+                       struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm);
+
+                       if (resv == obj->resv)
+                               continue;
+
+                       dma_resv_unlock(resv);
+
+                       if (last_locked == vm_bo)
+                               break;
+               }
+       }
+       mutex_unlock(&bo->base.gpuva.lock);
+
+       return ret == 0;
+}


diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c 
b/drivers/gpu/drm/panthor/panthor_mmu.c
index 3290e0b5facb..ffd821b3be46 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1,6 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0 or MIT
  /* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
  /* Copyright 2023 Collabora ltd. */
+/* Copyright 2025 ARM Limited. All rights reserved. */
#include <drm/drm_debugfs.h>
  #include <drm/drm_drv.h>
@@ -131,6 +132,9 @@ struct panthor_vma {
         * Only map related flags are accepted.
         */
        u32 flags;
+
+       /** @evicted: True if the VMA has been evicted. */
+       bool evicted;
  };
/** +void panthor_vm_update_bo_reclaim_lru_locked(struct panthor_gem_object *bo)
+{
+       struct panthor_device *ptdev = container_of(bo->base.dev, struct 
panthor_device, base);
+       struct panthor_vm *vm = NULL;
+       struct drm_gpuvm_bo *vm_bo;
+
+       dma_resv_assert_held(bo->base.resv);
+       lockdep_assert_held(&bo->base.gpuva.lock);
+
+       drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+               /* We're only supposed to have one vm_bo in the list if we get 
there. */
+               drm_WARN_ON(&ptdev->base, vm);
+               vm = container_of(vm_bo->vm, struct panthor_vm, base);
+
+               mutex_lock(&ptdev->reclaim.lock);
+               drm_gem_lru_move_tail_locked(&vm->reclaim.lru, &bo->base);
+               if (list_empty(&vm->reclaim.lru_node))
+                       list_move(&vm->reclaim.lru_node, &ptdev->reclaim.vms);
+               mutex_unlock(&ptdev->reclaim.lock);
+       }
+}
+
+int panthor_vm_evict_bo_mappings_locked(struct panthor_gem_object *bo)
+{
+       struct drm_gpuvm_bo *vm_bo;
+
+       drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+               struct panthor_vm *vm = container_of(vm_bo->vm, struct 
panthor_vm, base);
+               struct drm_gpuva *va;
+
+               if (!mutex_trylock(&vm->op_lock))
+                       return -EDEADLK;
+
+               drm_gpuvm_bo_evict(vm_bo, true);
+               drm_gpuvm_bo_for_each_va(va, vm_bo) {
+                       struct panthor_vma *vma = container_of(va, struct 
panthor_vma, base);
+
+                       panthor_vm_lock_region(vm, va->va.addr, va->va.range);
+                       panthor_vm_unmap_pages(vm, va->va.addr, va->va.range);

On further testing, I ran into a kernel warning issue.

https://elixir.bootlin.com/linux/v6.18-rc5/source/drivers/iommu/io-pgtable-arm.c#L641

https://elixir.bootlin.com/linux/v6.18-rc5/source/drivers/gpu/drm/panthor/panthor_mmu.c#L930

Call trace:
 __arm_lpae_unmap+0x570/0x5c8 (P)
 __arm_lpae_unmap+0x144/0x5c8
 __arm_lpae_unmap+0x144/0x5c8
 arm_lpae_unmap_pages+0xe8/0x120
 panthor_vm_unmap_pages+0x1f0/0x3f8 [panthor]
 panthor_vm_evict_bo_mappings_locked+0xf4/0x1d8 [panthor]
 panthor_gem_try_evict+0x190/0x7c8 [panthor]
 drm_gem_lru_scan+0x388/0x698

Following sequence leads to the kernel warnings.

- BO is mapped to GPU and is in GPU_MAPPED_PRIVATE state.

- Shrinker callback gets invoked and that BO is evicted. As a result BO is moved to UNRECLAIMABLE state.

- Userspace accesses the evicted BO and panthor_gem_fault() gets the backing pages again. BO is moved back to GPU_MAPPED_PRIVATE state (even though technically the BO is still in evicted state, both vm_bo->evicted and vma->evicted are TRUE.

- Shrinker callback is invoked again and tries to evict the same BO.

- panthor_vm_evict_bo_mappings_locked() calls panthor_vm_unmap_pages() and the kernel warnings are emiited as PTEs are already invalid.


Made the following change locally to avoid the warning.

diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index ffd821b3be46..e0a1dda1675f 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -2344,6 +2344,8 @@ int panthor_vm_evict_bo_mappings_locked(struct panthor_gem_object *bo)
                drm_gpuvm_bo_for_each_va(va, vm_bo) {
struct panthor_vma *vma = container_of(va, struct panthor_vma, base);

+                       if (vma->evicted)
+                               continue;
panthor_vm_lock_region(vm, va->va.addr, va->va.range); panthor_vm_unmap_pages(vm, va->va.addr, va->va.range);
                        panthor_vm_unlock_region(vm);



Do you think we can also update is_gpu_mapped() so that an evicted BO moves to MMAPPED state, instead of GPU_MAPPED_PRIVATE state, on CPU access ?.

Not sure if the following change makes sense.

diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index 6e91c5022d0d..8a8411fed195 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -125,6 +125,8 @@ static bool is_gpu_mapped(struct panthor_gem_object *bo,
        struct drm_gpuvm_bo *vm_bo;

        drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+               if (vm_bo->evicted)
+                       continue;
                if (!vm) {
                        *state = PANTHOR_GEM_GPU_MAPPED_PRIVATE;
                        vm = vm_bo->vm;


Please let me know.


Best regards
Akash




+                       panthor_vm_unlock_region(vm);
+                       vma->evicted = true;
+               }
+
+               mutex_unlock(&vm->op_lock);
+       }
+
+       return 0;
+}
+

  static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
        .vm_free = panthor_vm_free,
        .vm_bo_free = panthor_vm_bo_free,
        .sm_step_map = panthor_gpuva_sm_step_map,
        .sm_step_remap = panthor_gpuva_sm_step_remap,
        .sm_step_unmap = panthor_gpuva_sm_step_unmap,
+       .vm_bo_validate = panthor_vm_bo_validate,
  };
/**
@@ -2473,6 +2702,8 @@ panthor_vm_create(struct panthor_device *ptdev, bool 
for_mcu,
        vm->kernel_auto_va.start = auto_kernel_va_start;
        vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size 
- 1;
+ drm_gem_lru_init(&vm->reclaim.lru, &ptdev->reclaim.lock);
+       INIT_LIST_HEAD(&vm->reclaim.lru_node);
        INIT_LIST_HEAD(&vm->node);
        INIT_LIST_HEAD(&vm->as.lru_node);
        vm->as.id = -1;
@@ -2820,7 +3051,78 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec 
*exec, struct panthor_vm
        if (ret)
                return ret;
- return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
+       ret = drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
+       if (ret)
+               return ret;
+
+       return drm_gpuvm_validate(&vm->base, exec);
+}
+
+unsigned long
+panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
+                            unsigned int nr_to_scan, unsigned long *remaining,
+                            bool (*shrink)(struct drm_gem_object *,
+                                           struct ww_acquire_ctx *))
+{
+       unsigned long freed = 0;
+       LIST_HEAD(remaining_vms);
+       LIST_HEAD(vms);
+
+       mutex_lock(&ptdev->reclaim.lock);
+       list_splice_init(&ptdev->reclaim.vms, &vms);
+
+       while (freed < nr_to_scan) {
+               struct panthor_vm *vm;
+
+               vm = list_first_entry_or_null(&vms, typeof(*vm),
+                                             reclaim.lru_node);
+               if (!vm)
+                       break;
+
+               if (!kref_get_unless_zero(&vm->base.kref)) {
+                       list_del_init(&vm->reclaim.lru_node);
+                       continue;
+               }
+
+               mutex_unlock(&ptdev->reclaim.lock);
+
+               freed += drm_gem_lru_scan(&vm->reclaim.lru, nr_to_scan - freed,
+                                         remaining, shrink, NULL);
+
+               mutex_lock(&ptdev->reclaim.lock);
+
+               /* If the VM is still in the temporary list, remove it so we
+                * can proceed with the next VM.
+                */
+               if (vm->reclaim.lru_node.prev == &vms) {
+                       list_del_init(&vm->reclaim.lru_node);
+
+                       /* Keep the VM around if there are still things to
+                        * reclaim, so we can preserve the LRU order when
+                        * re-inserting in ptdev->reclaim.vms at the end.
+                        */
+                       if (vm->reclaim.lru.count > 0)
+                               list_add_tail(&vm->reclaim.lru_node, 
&remaining_vms);
+               }
+
+               mutex_unlock(&ptdev->reclaim.lock);
+
+               panthor_vm_put(vm);
+
+               mutex_lock(&ptdev->reclaim.lock);
+       }
+
+       /* Re-insert VMs with remaining data to reclaim at the beginning of
+        * the LRU. Note that any activeness change on the VM that happened
+        * while we were reclaiming would have moved the VM out of our
+        * temporary [remaining_]vms list, meaning anything we re-insert here
+        * preserves the LRU order.
+        */
+       list_splice_tail(&vms, &remaining_vms);
+       list_splice(&remaining_vms, &ptdev->reclaim.vms);
+       mutex_unlock(&ptdev->reclaim.lock);
+
+       return freed;
  }
/**
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h 
b/drivers/gpu/drm/panthor/panthor_mmu.h
index 0e268fdfdb2f..3522fbbce369 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.h
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -1,6 +1,7 @@
  /* SPDX-License-Identifier: GPL-2.0 or MIT */
  /* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
  /* Copyright 2023 Collabora ltd. */
+/* Copyright 2025 ARM Limited. All rights reserved. */
#ifndef __PANTHOR_MMU_H__
  #define __PANTHOR_MMU_H__
@@ -46,6 +47,13 @@ struct panthor_vm *panthor_vm_create(struct panthor_device 
*ptdev, bool for_mcu,
                                     u64 kernel_auto_va_start,
                                     u64 kernel_auto_va_size);
+void panthor_vm_update_bo_reclaim_lru_locked(struct panthor_gem_object *bo);
+int panthor_vm_evict_bo_mappings_locked(struct panthor_gem_object *bo);
+unsigned long
+panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
+                            unsigned int nr_to_scan, unsigned long *remaining,
+                            bool (*shrink)(struct drm_gem_object *,
+                                           struct ww_acquire_ctx *));
  int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec,
                                        struct panthor_vm *vm,
                                        u32 slot_count);

Reply via email to