A signaled scheduler fence can outlive its scheduler, since fences are
independencly reference counted. Therefore, we can't reference the
scheduler in the get_timeline_name() implementation.

Fixes oopses on `cat /sys/kernel/debug/dma_buf/bufinfo` when shared
dma-bufs reference fences from GPU schedulers that no longer exist.

Signed-off-by: Asahi Lina <l...@asahilina.net>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 7 ++++++-
 drivers/gpu/drm/scheduler/sched_fence.c  | 4 +++-
 include/drm/gpu_scheduler.h              | 5 +++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index b2bbc8a68b30..17f35b0b005a 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -389,7 +389,12 @@ static bool drm_sched_entity_add_dependency_cb(struct 
drm_sched_entity *entity)
 
                /*
                 * Fence is from the same scheduler, only need to wait for
-                * it to be scheduled
+                * it to be scheduled.
+                *
+                * Note: s_fence->sched could have been freed and reallocated
+                * as another scheduler. This false positive case is okay, as if
+                * the old scheduler was freed all of its jobs must have
+                * signaled their completion fences.
                 */
                fence = dma_fence_get(&s_fence->scheduled);
                dma_fence_put(entity->dependency);
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c 
b/drivers/gpu/drm/scheduler/sched_fence.c
index ef120475e7c6..06a0eebcca10 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -68,7 +68,7 @@ static const char *drm_sched_fence_get_driver_name(struct 
dma_fence *fence)
 static const char *drm_sched_fence_get_timeline_name(struct dma_fence *f)
 {
        struct drm_sched_fence *fence = to_drm_sched_fence(f);
-       return (const char *)fence->sched->name;
+       return (const char *)fence->sched_name;
 }
 
 static void drm_sched_fence_free_rcu(struct rcu_head *rcu)
@@ -216,6 +216,8 @@ void drm_sched_fence_init(struct drm_sched_fence *fence,
        unsigned seq;
 
        fence->sched = entity->rq->sched;
+       strlcpy(fence->sched_name, entity->rq->sched->name,
+               sizeof(fence->sched_name));
        seq = atomic_inc_return(&entity->fence_seq);
        dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
                       &fence->lock, entity->fence_context, seq);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index e95b4837e5a3..4fa9523bd47d 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -305,6 +305,11 @@ struct drm_sched_fence {
          * @lock: the lock used by the scheduled and the finished fences.
          */
        spinlock_t                      lock;
+        /**
+         * @sched_name: the name of the scheduler that owns this fence. We
+        * keep a copy here since fences can outlive their scheduler.
+         */
+       char sched_name[16];
         /**
          * @owner: job owner for debugging
          */

-- 
2.40.1

Reply via email to