Currently SRIOV runtime will use kiq to write HDP_MEM_FLUSH_CNTL for
hdp flush. This register need to be write from CPU for nbif to aware,
otherwise it will not work.
Add kiq ring callback to emit GPU_HDP_FLUSH, in amdgpu_device_flush_hdp
if no ring provided.

v2: remove changes to flush_hdp callback
v3: add mes fix

Signed-off-by: Victor Zhao <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 73 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c     |  5 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c     |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c    |  1 +
 9 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a77000c2e0bb..57d3ea33dec2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -7269,6 +7269,8 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
 
        if (ring && ring->funcs->emit_hdp_flush)
                amdgpu_ring_emit_hdp_flush(ring);
+       else if (!ring && amdgpu_sriov_runtime(adev))
+               amdgpu_kiq_hdp_flush(adev, 0);
        else
                amdgpu_asic_flush_hdp(adev, ring);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 7f02e36ccc1e..ecd7908590de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1194,6 +1194,78 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t v, uint3
        dev_err(adev->dev, "failed to write reg:%x\n", reg);
 }
 
+void amdgpu_kiq_hdp_flush(struct amdgpu_device *adev, uint32_t xcc_id)
+{
+       signed long r, cnt = 0;
+       unsigned long flags;
+       uint32_t seq;
+       uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *ring = &kiq->ring;
+
+       BUG_ON(!ring->funcs->emit_hdp_flush);
+
+       if (amdgpu_device_skip_hw_access(adev))
+               return;
+
+       if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready) {
+               hdp_flush_req_offset = 
adev->nbio.funcs->get_hdp_flush_req_offset(adev);
+               hdp_flush_done_offset = 
adev->nbio.funcs->get_hdp_flush_done_offset(adev);
+               ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0; /* 
Use CP0 for KIQ */
+
+               amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, 
hdp_flush_done_offset,
+                                             ref_and_mask, ref_and_mask);
+               return;
+       }
+
+       spin_lock_irqsave(&kiq->ring_lock, flags);
+       r = amdgpu_ring_alloc(ring, 32);
+       if (r)
+               goto failed_unlock;
+
+       amdgpu_ring_emit_hdp_flush(ring);
+       r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+       if (r)
+               goto failed_undo;
+
+       amdgpu_ring_commit(ring);
+       spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+       r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+       /* don't wait anymore for gpu reset case because this way may
+        * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+        * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+        * never return if we keep waiting in virt_kiq_rreg, which cause
+        * gpu_recover() hang there.
+        *
+        * also don't wait anymore for IRQ context
+        * */
+       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+               goto failed_kiq_hdp_flush;
+
+       might_sleep();
+       while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+               if (amdgpu_in_reset(adev))
+                       goto failed_kiq_hdp_flush;
+
+               msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+               r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+       }
+
+       if (cnt > MAX_KIQ_REG_TRY)
+               goto failed_kiq_hdp_flush;
+
+       return;
+
+failed_undo:
+       amdgpu_ring_undo(ring);
+failed_unlock:
+       spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_hdp_flush:
+       dev_err(adev->dev, "failed to flush HDP via KIQ\n");
+}
+
 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
 {
        if (amdgpu_num_kcq == -1) {
@@ -2484,3 +2556,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct 
amdgpu_device *adev)
                            &amdgpu_debugfs_compute_sched_mask_fops);
 #endif
 }
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index fb5f7a0ee029..5bccd2cc9518 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
                                  struct amdgpu_iv_entry *entry);
 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t 
xcc_id);
 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 
uint32_t xcc_id);
+void amdgpu_kiq_hdp_flush(struct amdgpu_device *adev, uint32_t xcc_id);
 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t 
ucode_id);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8841d7213de4..751732f3e883 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9951,6 +9951,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_kiq = {
        .emit_wreg = gfx_v10_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+       .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
 };
 
 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 66c47c466532..10d2219866f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -2438,7 +2438,7 @@ static int gfx_v11_0_rlc_load_microcode(struct 
amdgpu_device *adev)
                        if (version_minor == 3)
                                gfx_v11_0_load_rlcp_rlcv_microcode(adev);
                }
-               
+
                return 0;
        }
 
@@ -3886,7 +3886,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct 
amdgpu_device *adev)
        }
 
        memcpy(fw, fw_data, fw_size);
-       
+
        amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
        amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 
@@ -7320,6 +7320,7 @@ static const struct amdgpu_ring_funcs 
gfx_v11_0_ring_funcs_kiq = {
        .emit_wreg = gfx_v11_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+       .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
 };
 
 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 710ec9c34e43..e2bb8668150d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -5597,6 +5597,7 @@ static const struct amdgpu_ring_funcs 
gfx_v12_0_ring_funcs_kiq = {
        .emit_wreg = gfx_v12_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+       .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
 };
 
 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0856ff65288c..d3d0a4b0380c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6939,6 +6939,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_kiq = {
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .emit_rreg = gfx_v8_0_ring_emit_rreg,
        .emit_wreg = gfx_v8_0_ring_emit_wreg,
+       .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 };
 
 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dd19a97436db..f1a2efc2a8d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -7586,6 +7586,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_kiq = {
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+       .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
 };
 
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 77f9d5b9a556..b1fa4036befb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -4798,6 +4798,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_4_3_ring_funcs_kiq = {
        .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+       .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
 };
 
 static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
-- 
2.25.1

Reply via email to