amdgpu: Implement page table BO fence

Felix Kuehling Thu, 01 Jun 2023 13:12:37 -0700

On 2023-06-01 15:31, Philip Yang wrote:

Add pt_fence to amdgpu vm structure and implement helper functions. This
fence will be shared by all page table BOs of the same amdgpu vm.


Suggested-by: Felix Kuehling <felix.kuehl...@amd.com>
Signed-off-by: Philip Yang <philip.y...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 45 +++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  1 +
  4 files changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5af954abd5ba..09c116dfda31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1499,6 +1499,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
  int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
                               enum amd_powergating_state state);

+struct dma_fence *amdgpu_pt_fence_create(void);

+
  static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device 
*adev)
  {
        return amdgpu_gpu_recovery != 0 &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index c694b41f6461..d9bfb0af3a09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -57,6 +57,16 @@ struct amdgpu_fence {
        ktime_t                         start_timestamp;
  };

+/*

+ * Page table BO fence
+ * Fence used to ensure page table BOs are freed after TLB is flushed, to avoid
+ * H/W access corrupted data if the freed BO page is reused.
+ */
+struct amdgpu_pt_fence {
+       struct dma_fence base;
+       spinlock_t lock;
+};
+
  static struct kmem_cache *amdgpu_fence_slab;

int amdgpu_fence_slab_init(void)

@@ -79,6 +89,7 @@ void amdgpu_fence_slab_fini(void)
   */
  static const struct dma_fence_ops amdgpu_fence_ops;
  static const struct dma_fence_ops amdgpu_job_fence_ops;
+static const struct dma_fence_ops amdgpu_pt_fence_ops;
  static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
  {
        struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
@@ -852,6 +863,40 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
        .release = amdgpu_job_fence_release,
  };

+static atomic_t pt_fence_seq = ATOMIC_INIT(0);

+
+struct dma_fence *amdgpu_pt_fence_create(void)
+{
+       struct amdgpu_pt_fence *fence;
+
+       fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+       if (!fence)
+               return NULL;
+
+       spin_lock_init(&fence->lock);
+       dma_fence_init(&fence->base, &amdgpu_pt_fence_ops, &fence->lock,
+                      dma_fence_context_alloc(1), 
atomic_inc_return(&pt_fence_seq));

Creating a new fence context per fence is probably wrong. I think weonly need one PT fence context per GPU or per spatial partition, ormaybe per VM.


Regards,
  Felix

+
+       dma_fence_get(&fence->base);
+       return &fence->base;
+}
+
+static const char *amdgpu_pt_fence_get_timeline_name(struct dma_fence *f)
+{
+       return "pt_fence_timeline";
+}
+
+static void amdgpu_pt_fence_release(struct dma_fence *f)
+{
+       kfree_rcu(f, rcu);
+}
+
+static const struct dma_fence_ops amdgpu_pt_fence_ops = {
+       .get_driver_name = amdgpu_fence_get_driver_name,
+       .get_timeline_name = amdgpu_pt_fence_get_timeline_name,
+       .release = amdgpu_pt_fence_release,
+};
+
  /*
   * Fence debugfs
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 37b9d8a8dbec..0219398e625c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2147,6 +2147,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
                vm->update_funcs = &amdgpu_vm_sdma_funcs;

vm->last_update = dma_fence_get_stub();

+       vm->pt_fence = amdgpu_pt_fence_create();
        vm->last_unlocked = dma_fence_get_stub();
        vm->last_tlb_flush = dma_fence_get_stub();
        vm->generation = 0;
@@ -2270,6 +2271,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm)

dma_fence_put(vm->last_update);

        vm->last_update = dma_fence_get_stub();
+       dma_fence_put(vm->pt_fence);
+       vm->pt_fence = amdgpu_pt_fence_create();
        vm->is_compute_context = true;

/* Free the shadow bo for compute VM */

@@ -2358,6 +2361,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
        }

dma_fence_put(vm->last_update);

+       dma_fence_put(vm->pt_fence);
        for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
                amdgpu_vmid_free_reserved(adev, vm, i);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d551fca1780e..9cc729358612 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -286,6 +286,7 @@ struct amdgpu_vm {
        /* contains the page directory */
        struct amdgpu_vm_bo_base     root;
        struct dma_fence        *last_update;
+       struct dma_fence        *pt_fence;

/* Scheduler entities for page table updates */

        struct drm_sched_entity immediate;

Re: [PATCH 1/4] drm/amdgpu: Implement page table BO fence

Reply via email to