This allows the driver to fall back to pipe-level reset when per-queue
reset fails, improving recovery success for hung compute or graphics
rings.

V2: replace both gfx_v11_compute_pipe_reset_support() and 
gfx_v11_pipe_reset_support() with
amdgpu_ring_is_reset_type_supported (Alex)

Suggested-by:  Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a25fc25279b1..72a7d40a9c61 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1851,6 +1851,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block 
*ip_block)
                    !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.gfx_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
+                       adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_PIPE;
                }
                break;
        default:
@@ -1858,6 +1859,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block 
*ip_block)
                    !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.gfx_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
+                       adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_PIPE;
                }
                break;
        }
@@ -6807,23 +6809,12 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring 
*ring)
        amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
 }
 
-static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
-{
-       /* Disable the pipe reset until the CPFW fully support it.*/
-       dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
-       return false;
-}
-
-
 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
        uint32_t reset_pipe = 0, clean_pipe = 0;
        int r;
 
-       if (!gfx_v11_pipe_reset_support(adev))
-               return -EOPNOTSUPP;
-
        gfx_v11_0_set_safe_mode(adev, 0);
        mutex_lock(&adev->srbm_mutex);
        soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6884,6 +6875,9 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
        if (r) {
 
                dev_warn(adev->dev, "reset via MES failed and try pipe reset 
%d\n", r);
+               if (!amdgpu_ring_is_reset_type_supported(ring,
+                                                        
AMDGPU_RESET_TYPE_PER_PIPE))
+                       return r;
                r = gfx_v11_reset_gfx_pipe(ring);
                if (r)
                        return r;
@@ -6942,9 +6936,6 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)
        uint32_t reset_val, clean_val;
        int r;
 
-       if (!gfx_v11_pipe_reset_support(adev))
-               return -EOPNOTSUPP;
-
        gfx_v11_0_set_safe_mode(adev, 0);
        mutex_lock(&adev->srbm_mutex);
        soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -7083,6 +7074,9 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
        if (r) {
                dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe 
reset\n", r);
+               if (!amdgpu_ring_is_reset_type_supported(ring,
+                                                        
AMDGPU_RESET_TYPE_PER_PIPE))
+                       return r;
                amdgpu_gfx_mec_pipe_reset_prepare(adev, ring);
                r = gfx_v11_0_reset_compute_pipe(ring);
                if (r) {
-- 
2.49.0

Reply via email to