This allows the driver to fall back to pipe-level reset when per-queue reset fails, improving recovery success for hung compute or graphics rings.
V2: replace both gfx_v11_compute_pipe_reset_support() and gfx_v11_pipe_reset_support() with amdgpu_ring_is_reset_type_supported (Alex) Suggested-by: Alex Deucher <[email protected]> Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f90354e2ab3f..2dcdee1eef1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1557,6 +1557,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) !adev->debug_disable_gpu_ring_reset) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (adev->gfx.mec_fw_version >= 3190) + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } break; default: @@ -5257,22 +5259,12 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block) amdgpu_gfx_off_ctrl(adev, true); } -static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev) -{ - /* Disable the pipe reset until the CPFW fully support it.*/ - dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); - return false; -} - static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t reset_pipe = 0, clean_pipe = 0; int r; - if (!gfx_v12_pipe_reset_support(adev)) - return -EOPNOTSUPP; - gfx_v12_0_set_safe_mode(adev, 0); mutex_lock(&adev->srbm_mutex); soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -5333,6 +5325,10 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0); if (r) { dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + if (!amdgpu_ring_is_reset_type_supported(ring, + AMDGPU_RESET_TYPE_PER_PIPE)) + return r; + r = gfx_v12_reset_gfx_pipe(ring); if (r) return r; @@ -5389,9 +5385,6 @@ static int gfx_v12_0_reset_compute_pipe(struct amdgpu_device *adev, uint32_t reset_val, clean_val; int r = 0; - if (!gfx_v12_pipe_reset_support(adev)) - return -EOPNOTSUPP; - gfx_v12_0_set_safe_mode(adev, 0); mutex_lock(&adev->srbm_mutex); soc24_grbm_select(adev, me, pipe, queue, 0); @@ -5485,6 +5478,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); reset_mode = AMDGPU_RESET_TYPE_PER_PIPE; + if (!amdgpu_ring_is_reset_type_supported(ring, + AMDGPU_RESET_TYPE_PER_PIPE)) + return r; + amdgpu_amdkfd_suspend(adev, true); r = amdgpu_gfx_mec_pipe_reset_run(adev, ring->xcc_id, ring->me, ring->pipe, -- 2.49.0
