This allows the driver to fall back to pipe-level reset when per-queue reset fails, improving recovery success for hung compute or graphics rings.
V2: replace both gfx_v11_compute_pipe_reset_support() and gfx_v11_pipe_reset_support() with amdgpu_ring_is_reset_type_supported (Alex) Suggested-by: Alex Deucher <[email protected]> Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a25fc25279b1..72a7d40a9c61 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1851,6 +1851,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) !adev->debug_disable_gpu_ring_reset) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } break; default: @@ -1858,6 +1859,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) !adev->debug_disable_gpu_ring_reset) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } break; } @@ -6807,23 +6809,12 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } -static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) -{ - /* Disable the pipe reset until the CPFW fully support it.*/ - dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); - return false; -} - - static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t reset_pipe = 0, clean_pipe = 0; int r; - if (!gfx_v11_pipe_reset_support(adev)) - return -EOPNOTSUPP; - gfx_v11_0_set_safe_mode(adev, 0); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -6884,6 +6875,9 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, if (r) { dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + if (!amdgpu_ring_is_reset_type_supported(ring, + AMDGPU_RESET_TYPE_PER_PIPE)) + return r; r = gfx_v11_reset_gfx_pipe(ring); if (r) return r; @@ -6942,9 +6936,6 @@ static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) uint32_t reset_val, clean_val; int r; - if (!gfx_v11_pipe_reset_support(adev)) - return -EOPNOTSUPP; - gfx_v11_0_set_safe_mode(adev, 0); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -7083,6 +7074,9 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + if (!amdgpu_ring_is_reset_type_supported(ring, + AMDGPU_RESET_TYPE_PER_PIPE)) + return r; amdgpu_gfx_mec_pipe_reset_prepare(adev, ring); r = gfx_v11_0_reset_compute_pipe(ring); if (r) { -- 2.49.0
