[Why]
Newer VPE microcode has functionality that will decrease DPM level
only when a workload has run for 2 or more seconds.  If VPE is turned
off before this DPM decrease, the SOC can get stuck with a higher
DPM level.

This can happen from amdgpu's ring buffer test because it's a short
quick workload for VPE and VPE is turned off after 1s.

[How]
In idle handler besides checking fences are drained check that VPE DPM
level is really is at DPM0. If not, schedule delayed work again until
it is.

Cc: [email protected]
Reported-by: Sultan Alsawaf <[email protected]>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4615
Signed-off-by: Mario Limonciello <[email protected]>
---
v4:
 * only apply to Strix Halo (VPE 6.1.1)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 31 ++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 474bfe36c0c2..95d0badeb479 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -322,6 +322,26 @@ static int vpe_early_init(struct amdgpu_ip_block *ip_block)
        return 0;
 }
 
+/*
+ * vpe_wait_dpm0: whether to reschedule idle work waiting for IP to reach DPM0
+ */
+static int vpe_wait_dpm0(struct amdgpu_device *adev)
+{
+       struct amdgpu_vpe *vpe = &adev->vpe;
+
+       if (!adev->pm.dpm_enabled)
+               return 0;
+
+       switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+       case IP_VERSION(6, 1, 1):
+               break;
+       default:
+               return 0;
+       }
+
+       return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv));
+}
+
 static void vpe_idle_work_handler(struct work_struct *work)
 {
        struct amdgpu_device *adev =
@@ -329,11 +349,16 @@ static void vpe_idle_work_handler(struct work_struct 
*work)
        unsigned int fences = 0;
 
        fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+       if (fences)
+               goto reschedule;
 
-       if (fences == 0)
+       if (!vpe_wait_dpm0(adev)) {
                amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
-       else
-               schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+               return;
+       }
+
+reschedule:
+       schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
 }
 
 static int vpe_common_init(struct amdgpu_vpe *vpe)
-- 
2.51.0

Reply via email to