amdgpu: rework gmc_v10_0_flush_gpu_tlb

Felix Kuehling Fri, 08 Sep 2023 12:30:41 -0700

On 2023-09-05 02:04, Christian König wrote:

Move the SDMA workaround necessary for Navi 1x into a higher layer.


Signed-off-by: Christian König <christian.koe...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
  drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 ++++++-----------------
  4 files changed, 97 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d78bd9732543..857051093900 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device 
*adev)
        return 0;
  }

+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,

+                             uint32_t vmhub, uint32_t flush_type)
+{
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+       struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+       struct dma_fence *fence;
+       struct amdgpu_job *job;
+       int r;
+
+       if (!hub->sdma_invalidation_workaround || vmid ||

The "|| vmid" part of the condition is new. AFAICT, the workaround wasapplied to all VMIDs before this patch. Is this change intentional?


Regards,
  Felix

+           !adev->mman.buffer_funcs_enabled ||
+           !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
+           !ring->sched.ready) {
+               adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+                                                  flush_type);
+               return;
+       }
+
+       /* The SDMA on Navi 1x has a bug which can theoretically result in 
memory
+        * corruption if an invalidation happens at the same time as an VA
+        * translation. Avoid this by doing the invalidation from the SDMA
+        * itself at least for GART.
+        */
+       mutex_lock(&adev->mman.gtt_window_lock);
+       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+                                    AMDGPU_FENCE_OWNER_UNDEFINED,
+                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+                                    &job);
+       if (r)
+               goto error_alloc;
+
+       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+       job->vm_needs_flush = true;
+       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+       fence = amdgpu_job_submit(job);
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
+       dma_fence_wait(fence, false);
+       dma_fence_put(fence);
+
+       return;
+
+error_alloc:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
  /**
   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
   * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index fdc25cd559b6..9e7df2f69123 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -117,6 +117,8 @@ struct amdgpu_vmhub {

uint32_t vm_contexts_disable;+ bool sdma_invalidation_workaround;

+
        const struct amdgpu_vmhub_funcs *vmhub_funcs;
  };

@@ -335,7 +337,6 @@ struct amdgpu_gmc {

        u64 noretry_flags;
  };

-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))

  #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
        ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
        ((adev), (pasid), (type), (allhub), (inst)))
@@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
  void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+                             uint32_t vmhub, uint32_t flush_type);

extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);

  extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index a041c6c970e1..8521c45e8f38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
                
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
                
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;

+ /* TODO: This is only needed on some Navi 1x revisions */

+       hub->sdma_invalidation_workaround = true;
+
        hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index fa87a85e1017..1f70c57bcd69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -230,20 +230,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
   * by the amdgpu vm/hsa code.
   */

-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,

-                                  unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+                                       uint32_t vmhub, uint32_t flush_type)
  {
        bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
        struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
        u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
-       u32 tmp;
        /* Use register 17 for GART */
        const unsigned int eng = 17;
-       unsigned int i;
        unsigned char hub_ip = 0;
+       u32 sem, req, ack;
+       unsigned int i;
+       u32 tmp;

- hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?

-                  GC_HWIP : MMHUB_HWIP;
+       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+       /* flush hdp cache */
+       adev->hdp.funcs->flush_hdp(adev, NULL);
+
+       /* For SRIOV run time, driver shouldn't access the register through MMIO
+        * Directly use kiq to do the vm invalidation instead
+        */
+       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+           down_read_trylock(&adev->reset_domain->sem)) {
+               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+                               1 << vmid);
+               up_read(&adev->reset_domain->sem);
+               return;
+       }
+
+       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;

spin_lock(&adev->gmc.invalidate_lock);

        /*
@@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
        if (use_semaphore) {
                for (i = 0; i < adev->usec_timeout; i++) {
                        /* a read return value of 1 means semaphore acuqire */
-                       tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-                                        hub->eng_distance * eng, hub_ip);
-
+                       tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
                        if (tmp & 0x1)
                                break;
                        udelay(1);
@@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
                        DRM_ERROR("Timeout waiting for sem acquire in VM 
flush!\n");
        }

- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +

-                         hub->eng_distance * eng,
-                         inv_req, hub_ip);
+       WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);

/*

         * Issue a dummy read to wait for the ACK register to be cleared
@@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
         */
        if ((vmhub == AMDGPU_GFXHUB(0)) &&
            (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
-               RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
-                                 hub->eng_distance * eng, hub_ip);
+               RREG32_RLC_NO_KIQ(req, hub_ip);

/* Wait for ACK with a delay.*/

        for (i = 0; i < adev->usec_timeout; i++) {
-               tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
-                                 hub->eng_distance * eng, hub_ip);
-
+               tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
                tmp &= 1 << vmid;
                if (tmp)
                        break;
@@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,

/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */

        if (use_semaphore)
-               /*
-                * add semaphore release after invalidation,
-                * write with 0 means semaphore release
-                */
-               WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
-                                 hub->eng_distance * eng, 0, hub_ip);
+               WREG32_RLC_NO_KIQ(sem, 0, hub_ip);

spin_unlock(&adev->gmc.invalidate_lock);- if (i < adev->usec_timeout)

-               return;
-
-       DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
-                                       uint32_t vmhub, uint32_t flush_type)
-{
-       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-       struct dma_fence *fence;
-       struct amdgpu_job *job;
-
-       int r;
-
-       /* flush hdp cache */
-       adev->hdp.funcs->flush_hdp(adev, NULL);
-
-       /* For SRIOV run time, driver shouldn't access the register through MMIO
-        * Directly use kiq to do the vm invalidation instead
-        */
-       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
-           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-           down_read_trylock(&adev->reset_domain->sem)) {
-               struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
-               const unsigned int eng = 17;
-               u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
flush_type);
-               u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-               u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
-               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-                               1 << vmid);
-
-               up_read(&adev->reset_domain->sem);
-               return;
-       }
-
-       mutex_lock(&adev->mman.gtt_window_lock);
-
-       if (vmhub == AMDGPU_MMHUB0(0)) {
-               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
-               mutex_unlock(&adev->mman.gtt_window_lock);
-               return;
-       }
-
-       BUG_ON(vmhub != AMDGPU_GFXHUB(0));
-
-       if (!adev->mman.buffer_funcs_enabled ||
-           !adev->ib_pool_ready ||
-           amdgpu_in_reset(adev) ||
-           ring->sched.ready == false) {
-               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
-               mutex_unlock(&adev->mman.gtt_window_lock);
-               return;
-       }
-
-       /* The SDMA on Navi has a bug which can theoretically result in memory
-        * corruption if an invalidation happens at the same time as an VA
-        * translation. Avoid this by doing the invalidation from the SDMA
-        * itself.
-        */
-       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED,
-                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
-                                    &job);
-       if (r)
-               goto error_alloc;
-
-       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
-       job->vm_needs_flush = true;
-       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
-       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-       fence = amdgpu_job_submit(job);
-
-       mutex_unlock(&adev->mman.gtt_window_lock);
-
-       dma_fence_wait(fence, false);
-       dma_fence_put(fence);
-
-       return;
-
-error_alloc:
-       mutex_unlock(&adev->mman.gtt_window_lock);
-       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+       if (i >= adev->usec_timeout)
+               DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
  }

/**

Re: [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb

Reply via email to