Hi Boris,
On 1/9/26 13:08, Boris Brezillon wrote:
From: Akash Goel <[email protected]>
This implementation is losely based on the MSM shrinker, and it's
relying on the drm_gpuvm eviction/validation infrastructure.
Right now we only support swapout/eviction, but we could add an extra
flag to specify when buffer content doesn't need to be preserved to
avoid the swapout/swapin dance.
Locking is a bit of a nightmare, but using _trylock() all the way in
the reclaim path seems to make lockdep happy. And yes, we might be
missing opportunities to reclaim when the system is under heavy GPU
load/heavy memory pressure/heavy GPU VM activity, but that's better
than no reclaim at all.
Signed-off-by: Akash Goel <[email protected]>
Co-developed-by: Boris Brezillon <[email protected]>
Signed-off-by: Boris Brezillon <[email protected]>
---
drivers/gpu/drm/panthor/panthor_device.c | 11 +-
drivers/gpu/drm/panthor/panthor_device.h | 73 ++++
drivers/gpu/drm/panthor/panthor_gem.c | 427 ++++++++++++++++++++++-
drivers/gpu/drm/panthor/panthor_gem.h | 67 ++++
drivers/gpu/drm/panthor/panthor_mmu.c | 338 +++++++++++++++++-
drivers/gpu/drm/panthor/panthor_mmu.h | 8 +
6 files changed, 901 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.h
b/drivers/gpu/drm/panthor/panthor_device.h
index f35e52b9546a..bc348aa9634e 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
#include <drm/drm_mm.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>
@@ -157,6 +158,78 @@ struct panthor_device {
/** @devfreq: Device frequency scaling management data. */
struct panthor_devfreq *devfreq;
+static bool is_gpu_mapped(struct panthor_gem_object *bo,
+ enum panthor_gem_reclaim_state *state)
+{
+ struct drm_gpuvm *vm = NULL;
+ struct drm_gpuvm_bo *vm_bo;
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+ if (!vm) {
+ *state = PANTHOR_GEM_GPU_MAPPED_PRIVATE;
+ vm = vm_bo->vm;
+ } else if (vm != vm_bo->vm) {
+ *state = PANTHOR_GEM_GPU_MAPPED_SHARED;
+ break;
+ }
+ }
+
+ return !!vm;
+}
+
+static enum panthor_gem_reclaim_state
+panthor_gem_evaluate_reclaim_state_locked(struct panthor_gem_object *bo)
+{
+ enum panthor_gem_reclaim_state gpu_mapped_state;
+
+ dma_resv_assert_held(bo->base.resv);
+ lockdep_assert_held(&bo->base.gpuva.lock);
+
+ /* If pages have not been allocated, there's nothing to reclaim. */
+ if (!bo->backing.pages)
+ return PANTHOR_GEM_UNRECLAIMABLE;
+
+ /* If memory is pinned, we prevent reclaim. */
+ if (refcount_read(&bo->backing.pin_count))
+ return PANTHOR_GEM_UNRECLAIMABLE;
+
+ if (is_gpu_mapped(bo, &gpu_mapped_state))
+ return gpu_mapped_state;
+
+ if (refcount_read(&bo->cmap.mmap_count) && bo->backing.pages)
+ return PANTHOR_GEM_MMAPPED;
+
+ return PANTHOR_GEM_UNUSED;
+}
+
+void panthor_gem_update_reclaim_state_locked(struct panthor_gem_object *bo,
+ enum panthor_gem_reclaim_state
*old_statep)
+{
+ struct panthor_device *ptdev = container_of(bo->base.dev, struct
panthor_device, base);
+ enum panthor_gem_reclaim_state old_state = bo->reclaim_state;
+ enum panthor_gem_reclaim_state new_state;
+ bool was_gpu_mapped, is_gpu_mapped;
+
+ if (old_statep)
+ *old_statep = old_state;
+
+ new_state = panthor_gem_evaluate_reclaim_state_locked(bo);
+ if (new_state == old_state)
+ return;
+
+ was_gpu_mapped = old_state == PANTHOR_GEM_GPU_MAPPED_SHARED ||
+ old_state == PANTHOR_GEM_GPU_MAPPED_PRIVATE;
+ is_gpu_mapped = new_state == PANTHOR_GEM_GPU_MAPPED_SHARED ||
+ new_state == PANTHOR_GEM_GPU_MAPPED_PRIVATE;
+
+ if (is_gpu_mapped && !was_gpu_mapped)
+ ptdev->reclaim.gpu_mapped_count += bo->base.size >> PAGE_SHIFT;
+ else if (!is_gpu_mapped && was_gpu_mapped)
+ ptdev->reclaim.gpu_mapped_count -= bo->base.size >> PAGE_SHIFT;
+
+ switch (new_state) {
+ case PANTHOR_GEM_UNUSED:
+ drm_gem_lru_move_tail(&ptdev->reclaim.unused, &bo->base);
+ break;
+ case PANTHOR_GEM_MMAPPED:
+ drm_gem_lru_move_tail(&ptdev->reclaim.mmapped, &bo->base);
+ break;
+ case PANTHOR_GEM_GPU_MAPPED_PRIVATE:
+ panthor_vm_update_bo_reclaim_lru_locked(bo);
+ break;
+ case PANTHOR_GEM_GPU_MAPPED_SHARED:
+ drm_gem_lru_move_tail(&ptdev->reclaim.gpu_mapped_shared,
&bo->base);
+ break;
+ case PANTHOR_GEM_UNRECLAIMABLE:
+ drm_gem_lru_remove(&bo->base);
+ break;
+ default:
+ break;
+ }
+
+ bo->reclaim_state = new_state;
+}
+
static void
panthor_gem_backing_cleanup(struct panthor_gem_object *bo)
{
@@ -153,8 +249,12 @@ static int panthor_gem_backing_pin_locked(struct
panthor_gem_object *bo)
return 0;
ret = panthor_gem_backing_get_pages_locked(bo);
- if (!ret)
+ if (!ret) {
refcount_set(&bo->backing.pin_count, 1);
+ mutex_lock(&bo->base.gpuva.lock);
+ panthor_gem_update_reclaim_state_locked(bo, NULL);
+ mutex_unlock(&bo->base.gpuva.lock);
+ }
return ret;
}
@@ -167,8 +267,12 @@ static void panthor_gem_backing_unpin_locked(struct
panthor_gem_object *bo)
/* We don't release anything when pin_count drops to zero.
* Pages stay there until an explicit cleanup is requested.
*/
- if (!refcount_dec_not_one(&bo->backing.pin_count))
+ if (!refcount_dec_not_one(&bo->backing.pin_count)) {
refcount_set(&bo->backing.pin_count, 0);
+ mutex_lock(&bo->base.gpuva.lock);
+ panthor_gem_update_reclaim_state_locked(bo, NULL);
+ mutex_unlock(&bo->base.gpuva.lock);
+ }
}
static void
@@ -531,6 +635,49 @@ void panthor_gem_unpin(struct panthor_gem_object *bo)
dma_resv_unlock(bo->base.resv);
}
+
+static void panthor_gem_evict_locked(struct panthor_gem_object *bo)
+{
+ dma_resv_assert_held(bo->base.resv);
+ lockdep_assert_held(&bo->base.gpuva.lock);
+
+ if (drm_WARN_ON_ONCE(bo->base.dev, drm_gem_is_imported(&bo->base)))
+ return;
+
+ if (drm_WARN_ON_ONCE(bo->base.dev,
refcount_read(&bo->backing.pin_count)))
+ return;
+
+ if (drm_WARN_ON_ONCE(bo->base.dev, !bo->backing.pages))
+ return;
+
+ panthor_gem_dev_map_cleanup(bo);
+ panthor_gem_backing_cleanup(bo);
+ panthor_gem_update_reclaim_state_locked(bo, NULL);
+}
+
+
+static bool should_wait(enum panthor_gem_reclaim_state reclaim_state)
+{
+ return reclaim_state == PANTHOR_GEM_GPU_MAPPED_PRIVATE ||
+ reclaim_state == PANTHOR_GEM_GPU_MAPPED_SHARED;
+}
+
+bool panthor_gem_try_evict(struct drm_gem_object *obj,
+ struct ww_acquire_ctx *ticket)
+{
+ /*
+ * Track last locked entry for unwinding locks in error and
+ * success paths
+ */
+ struct panthor_gem_object *bo = to_panthor_bo(obj);
+ struct drm_gpuvm_bo *vm_bo, *last_locked = NULL;
+ enum panthor_gem_reclaim_state old_state;
+ int ret = 0;
+
+ /* To avoid potential lock ordering issue between bo_gpuva and
+ * mapping->i_mmap_rwsem, unmap the pages from CPU side before
+ * acquring the bo_gpuva lock. As the bo_resv lock is held, CPU
+ * page fault handler won't be able to map in the pages whilst
+ * eviction is in progress.
+ */
+ drm_vma_node_unmap(&bo->base.vma_node,
bo->base.dev->anon_inode->i_mapping);
+
+ /* We take this lock when walking the list to prevent
+ * insertion/deletion.
+ */
+ /* We can only trylock in that path, because
+ * - allocation might happen while some of these locks are held
+ * - lock ordering is different in other paths
+ * vm_resv -> bo_resv -> bo_gpuva
+ * vs
+ * bo_resv -> bo_gpuva -> vm_resv
+ *
+ * If we fail to lock that's fine, we back off and will get
+ * back to it later.
+ */
+ if (!mutex_trylock(&bo->base.gpuva.lock))
+ return false;
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm);
+
+ if (resv == obj->resv)
+ continue;
+
+ if (!dma_resv_trylock(resv)) {
+ ret = -EDEADLK;
+ goto out_unlock;
+ }
+
+ last_locked = vm_bo;
+ }
+
+ /* Update the state before trying to evict the buffer, if the state was
+ * updated to something that's harder to reclaim (higher value in the
+ * enum), skip it (will be processed when the relevant LRU is).
+ */
+ panthor_gem_update_reclaim_state_locked(bo, &old_state);
+ if (old_state < bo->reclaim_state) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
+ /* Wait was too long, skip. */
+ if (should_wait(bo->reclaim_state) &&
+ dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
10) <= 0) {
+ ret = -ETIMEDOUT;
+ goto out_unlock;
+ }
+
+ /* Couldn't teardown the GPU mappings? Skip. */
+ ret = panthor_vm_evict_bo_mappings_locked(bo);
+ if (ret)
+ goto out_unlock;
+
+ /* If everything went fine, evict the object. */
+ panthor_gem_evict_locked(bo);
+
+out_unlock:
+ if (last_locked) {
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm);
+
+ if (resv == obj->resv)
+ continue;
+
+ dma_resv_unlock(resv);
+
+ if (last_locked == vm_bo)
+ break;
+ }
+ }
+ mutex_unlock(&bo->base.gpuva.lock);
+
+ return ret == 0;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
b/drivers/gpu/drm/panthor/panthor_mmu.c
index 3290e0b5facb..ffd821b3be46 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 or MIT
/* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
/* Copyright 2023 Collabora ltd. */
+/* Copyright 2025 ARM Limited. All rights reserved. */
#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>
@@ -131,6 +132,9 @@ struct panthor_vma {
* Only map related flags are accepted.
*/
u32 flags;
+
+ /** @evicted: True if the VMA has been evicted. */
+ bool evicted;
};
/**
+void panthor_vm_update_bo_reclaim_lru_locked(struct panthor_gem_object *bo)
+{
+ struct panthor_device *ptdev = container_of(bo->base.dev, struct
panthor_device, base);
+ struct panthor_vm *vm = NULL;
+ struct drm_gpuvm_bo *vm_bo;
+
+ dma_resv_assert_held(bo->base.resv);
+ lockdep_assert_held(&bo->base.gpuva.lock);
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+ /* We're only supposed to have one vm_bo in the list if we get
there. */
+ drm_WARN_ON(&ptdev->base, vm);
+ vm = container_of(vm_bo->vm, struct panthor_vm, base);
+
+ mutex_lock(&ptdev->reclaim.lock);
+ drm_gem_lru_move_tail_locked(&vm->reclaim.lru, &bo->base);
+ if (list_empty(&vm->reclaim.lru_node))
+ list_move(&vm->reclaim.lru_node, &ptdev->reclaim.vms);
+ mutex_unlock(&ptdev->reclaim.lock);
+ }
+}
+
+int panthor_vm_evict_bo_mappings_locked(struct panthor_gem_object *bo)
+{
+ struct drm_gpuvm_bo *vm_bo;
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+ struct panthor_vm *vm = container_of(vm_bo->vm, struct
panthor_vm, base);
+ struct drm_gpuva *va;
+
+ if (!mutex_trylock(&vm->op_lock))
+ return -EDEADLK;
+
+ drm_gpuvm_bo_evict(vm_bo, true);
+ drm_gpuvm_bo_for_each_va(va, vm_bo) {
+ struct panthor_vma *vma = container_of(va, struct
panthor_vma, base);
+
+ panthor_vm_lock_region(vm, va->va.addr, va->va.range);
+ panthor_vm_unmap_pages(vm, va->va.addr, va->va.range);
On further testing, I ran into a kernel warning issue.
https://elixir.bootlin.com/linux/v6.18-rc5/source/drivers/iommu/io-pgtable-arm.c#L641
https://elixir.bootlin.com/linux/v6.18-rc5/source/drivers/gpu/drm/panthor/panthor_mmu.c#L930
Call trace:
__arm_lpae_unmap+0x570/0x5c8 (P)
__arm_lpae_unmap+0x144/0x5c8
__arm_lpae_unmap+0x144/0x5c8
arm_lpae_unmap_pages+0xe8/0x120
panthor_vm_unmap_pages+0x1f0/0x3f8 [panthor]
panthor_vm_evict_bo_mappings_locked+0xf4/0x1d8 [panthor]
panthor_gem_try_evict+0x190/0x7c8 [panthor]
drm_gem_lru_scan+0x388/0x698
Following sequence leads to the kernel warnings.
- BO is mapped to GPU and is in GPU_MAPPED_PRIVATE state.
- Shrinker callback gets invoked and that BO is evicted. As a result BO
is moved to UNRECLAIMABLE state.
- Userspace accesses the evicted BO and panthor_gem_fault() gets the
backing pages again. BO is moved back to GPU_MAPPED_PRIVATE state (even
though technically the BO is still in evicted state, both vm_bo->evicted
and vma->evicted are TRUE.
- Shrinker callback is invoked again and tries to evict the same BO.
- panthor_vm_evict_bo_mappings_locked() calls panthor_vm_unmap_pages()
and the kernel warnings are emiited as PTEs are already invalid.
Made the following change locally to avoid the warning.
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
b/drivers/gpu/drm/panthor/panthor_mmu.c
index ffd821b3be46..e0a1dda1675f 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -2344,6 +2344,8 @@ int panthor_vm_evict_bo_mappings_locked(struct
panthor_gem_object *bo)
drm_gpuvm_bo_for_each_va(va, vm_bo) {
struct panthor_vma *vma = container_of(va,
struct panthor_vma, base);
+ if (vma->evicted)
+ continue;
panthor_vm_lock_region(vm, va->va.addr,
va->va.range);
panthor_vm_unmap_pages(vm, va->va.addr,
va->va.range);
panthor_vm_unlock_region(vm);
Do you think we can also update is_gpu_mapped() so that an evicted BO
moves to MMAPPED state, instead of GPU_MAPPED_PRIVATE state, on CPU
access ?.
Not sure if the following change makes sense.
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c
b/drivers/gpu/drm/panthor/panthor_gem.c
index 6e91c5022d0d..8a8411fed195 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -125,6 +125,8 @@ static bool is_gpu_mapped(struct panthor_gem_object *bo,
struct drm_gpuvm_bo *vm_bo;
drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+ if (vm_bo->evicted)
+ continue;
if (!vm) {
*state = PANTHOR_GEM_GPU_MAPPED_PRIVATE;
vm = vm_bo->vm;
Please let me know.
Best regards
Akash
+ panthor_vm_unlock_region(vm);
+ vma->evicted = true;
+ }
+
+ mutex_unlock(&vm->op_lock);
+ }
+
+ return 0;
+}
+
static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
.vm_free = panthor_vm_free,
.vm_bo_free = panthor_vm_bo_free,
.sm_step_map = panthor_gpuva_sm_step_map,
.sm_step_remap = panthor_gpuva_sm_step_remap,
.sm_step_unmap = panthor_gpuva_sm_step_unmap,
+ .vm_bo_validate = panthor_vm_bo_validate,
};
/**
@@ -2473,6 +2702,8 @@ panthor_vm_create(struct panthor_device *ptdev, bool
for_mcu,
vm->kernel_auto_va.start = auto_kernel_va_start;
vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size
- 1;
+ drm_gem_lru_init(&vm->reclaim.lru, &ptdev->reclaim.lock);
+ INIT_LIST_HEAD(&vm->reclaim.lru_node);
INIT_LIST_HEAD(&vm->node);
INIT_LIST_HEAD(&vm->as.lru_node);
vm->as.id = -1;
@@ -2820,7 +3051,78 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec
*exec, struct panthor_vm
if (ret)
return ret;
- return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
+ ret = drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
+ if (ret)
+ return ret;
+
+ return drm_gpuvm_validate(&vm->base, exec);
+}
+
+unsigned long
+panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
+ unsigned int nr_to_scan, unsigned long *remaining,
+ bool (*shrink)(struct drm_gem_object *,
+ struct ww_acquire_ctx *))
+{
+ unsigned long freed = 0;
+ LIST_HEAD(remaining_vms);
+ LIST_HEAD(vms);
+
+ mutex_lock(&ptdev->reclaim.lock);
+ list_splice_init(&ptdev->reclaim.vms, &vms);
+
+ while (freed < nr_to_scan) {
+ struct panthor_vm *vm;
+
+ vm = list_first_entry_or_null(&vms, typeof(*vm),
+ reclaim.lru_node);
+ if (!vm)
+ break;
+
+ if (!kref_get_unless_zero(&vm->base.kref)) {
+ list_del_init(&vm->reclaim.lru_node);
+ continue;
+ }
+
+ mutex_unlock(&ptdev->reclaim.lock);
+
+ freed += drm_gem_lru_scan(&vm->reclaim.lru, nr_to_scan - freed,
+ remaining, shrink, NULL);
+
+ mutex_lock(&ptdev->reclaim.lock);
+
+ /* If the VM is still in the temporary list, remove it so we
+ * can proceed with the next VM.
+ */
+ if (vm->reclaim.lru_node.prev == &vms) {
+ list_del_init(&vm->reclaim.lru_node);
+
+ /* Keep the VM around if there are still things to
+ * reclaim, so we can preserve the LRU order when
+ * re-inserting in ptdev->reclaim.vms at the end.
+ */
+ if (vm->reclaim.lru.count > 0)
+ list_add_tail(&vm->reclaim.lru_node,
&remaining_vms);
+ }
+
+ mutex_unlock(&ptdev->reclaim.lock);
+
+ panthor_vm_put(vm);
+
+ mutex_lock(&ptdev->reclaim.lock);
+ }
+
+ /* Re-insert VMs with remaining data to reclaim at the beginning of
+ * the LRU. Note that any activeness change on the VM that happened
+ * while we were reclaiming would have moved the VM out of our
+ * temporary [remaining_]vms list, meaning anything we re-insert here
+ * preserves the LRU order.
+ */
+ list_splice_tail(&vms, &remaining_vms);
+ list_splice(&remaining_vms, &ptdev->reclaim.vms);
+ mutex_unlock(&ptdev->reclaim.lock);
+
+ return freed;
}
/**
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h
b/drivers/gpu/drm/panthor/panthor_mmu.h
index 0e268fdfdb2f..3522fbbce369 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.h
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 or MIT */
/* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
/* Copyright 2023 Collabora ltd. */
+/* Copyright 2025 ARM Limited. All rights reserved. */
#ifndef __PANTHOR_MMU_H__
#define __PANTHOR_MMU_H__
@@ -46,6 +47,13 @@ struct panthor_vm *panthor_vm_create(struct panthor_device
*ptdev, bool for_mcu,
u64 kernel_auto_va_start,
u64 kernel_auto_va_size);
+void panthor_vm_update_bo_reclaim_lru_locked(struct panthor_gem_object *bo);
+int panthor_vm_evict_bo_mappings_locked(struct panthor_gem_object *bo);
+unsigned long
+panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
+ unsigned int nr_to_scan, unsigned long *remaining,
+ bool (*shrink)(struct drm_gem_object *,
+ struct ww_acquire_ctx *));
int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec,
struct panthor_vm *vm,
u32 slot_count);