This allows the driver to fall back to pipe-level reset when per-queue
reset fails, improving recovery success for hung compute or graphics
rings.
V2: replace both gfx_v11_compute_pipe_reset_support() and
gfx_v11_pipe_reset_support() with
amdgpu_ring_is_reset_type_supported (Alex)
Suggested-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 00cbd1c09cd2..34a2ccabacf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1563,6 +1563,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block
*ip_block)
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |=
AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |=
AMDGPU_RESET_TYPE_PER_PIPE;
adev->gfx.gfx_supported_reset |=
AMDGPU_RESET_TYPE_PER_QUEUE;
}
break;
@@ -5264,22 +5265,12 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block
*ip_block)
amdgpu_gfx_off_ctrl(adev, true);
}
-static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
-{
- /* Disable the pipe reset until the CPFW fully support it.*/
- dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
- return false;
-}
-
static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reset_pipe = 0, clean_pipe = 0;
int r;
- if (!gfx_v12_pipe_reset_support(adev))
- return -EOPNOTSUPP;
-
gfx_v12_0_set_safe_mode(adev, 0);
mutex_lock(&adev->srbm_mutex);
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -5340,6 +5331,9 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0);
if (r) {
dev_warn(adev->dev, "reset via MES failed and try pipe reset
%d\n", r);
+ if (!amdgpu_ring_is_reset_type_supported(ring,
+
AMDGPU_RESET_TYPE_PER_PIPE))
+ return r;
r = gfx_v12_reset_gfx_pipe(ring);
if (r)
return r;
@@ -5396,9 +5390,6 @@ static int gfx_v12_0_reset_compute_pipe(struct
amdgpu_ring *ring)
uint32_t reset_val, clean_val;
int r = 0;
- if (!gfx_v12_pipe_reset_support(adev))
- return -EOPNOTSUPP;
-
gfx_v12_0_set_safe_mode(adev, 0);
mutex_lock(&adev->srbm_mutex);
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -5488,6 +5479,9 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
if (r) {
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe
reset\n", r);
+ if (!amdgpu_ring_is_reset_type_supported(ring,
+
AMDGPU_RESET_TYPE_PER_PIPE))
+ return r;
amdgpu_gfx_mec_pipe_reset_prepare(adev, ring);
r = gfx_v12_0_reset_compute_pipe(ring);
if (r) {
--
2.49.0