From: Ray Wu <[email protected]>

[Why]
System hang observed during suspend/resume while video is playing.
amdgpu_dm_ism_disable() is called under dc_lock and waits for ISM
delayed work via disable_delayed_work_sync(). The work handlers
themselves take dc_lock, producing an ABBA deadlock when a worker is
in flight at suspend time.

[How]
Split the disable path into two phases with opposite locking
contracts:
  1. amdgpu_dm_ism_disable() -- quiesces workers, must NOT hold
     dc_lock.
  2. amdgpu_dm_ism_force_full_power() (new) -- drives the ISM FSM
     back to FULL_POWER_RUNNING, must hold dc_lock.

Reviewed-by: Sun peng (Leo) Li <[email protected]>
Signed-off-by: Ray Wu <[email protected]>
Signed-off-by: Ivan Lipski <[email protected]>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 25 +++++++--
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c | 56 ++++++++++++++++---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h |  1 +
 3 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c41f017fe8f2..af0af7519517 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2327,9 +2327,16 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
                adev->dm.idle_workqueue = NULL;
        }
 
-       /* Disable ISM before dc_destroy() invalidates dm->dc */
+       /*
+        * Disable ISM before dc_destroy() invalidates dm->dc.
+        *
+        * Quiesce workers first without dc_lock (they take dc_lock
+        * themselves, so syncing under it would deadlock), then drive the
+        * FSM back to FULL_POWER_RUNNING under dc_lock.
+        */
+       amdgpu_dm_ism_disable(&adev->dm);
        scoped_guard(mutex, &adev->dm.dc_lock)
-               amdgpu_dm_ism_disable(&adev->dm);
+               amdgpu_dm_ism_force_full_power(&adev->dm);
 
        amdgpu_dm_destroy_drm_device(&adev->dm);
 
@@ -3362,9 +3369,14 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)
        if (amdgpu_in_reset(adev)) {
                enum dc_status res;
 
+               /* Quiesce ISM workers before taking dc_lock (workers take
+                * dc_lock themselves; syncing under it would deadlock).
+                */
+               amdgpu_dm_ism_disable(dm);
+
                mutex_lock(&dm->dc_lock);
 
-               amdgpu_dm_ism_disable(dm);
+               amdgpu_dm_ism_force_full_power(dm);
                dc_allow_idle_optimizations(adev->dm.dc, false);
 
                dm->cached_dc_state = 
dc_state_create_copy(dm->dc->current_state);
@@ -3398,8 +3410,13 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)
 
        amdgpu_dm_irq_suspend(adev);
 
+       /*
+        * Quiesce ISM workers before taking dc_lock (workers take dc_lock
+        * themselves; syncing under it would deadlock).
+        */
+       amdgpu_dm_ism_disable(dm);
        scoped_guard(mutex, &dm->dc_lock)
-               amdgpu_dm_ism_disable(dm);
+               amdgpu_dm_ism_force_full_power(dm);
 
        hpd_rx_irq_work_suspend(dm);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
index bc7db5e759d1..857c22007743 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
@@ -524,13 +524,20 @@ static void dm_ism_sso_delayed_work_func(struct 
work_struct *work)
 }
 
 /**
- * amdgpu_dm_ism_disable - Disable the ISM
+ * amdgpu_dm_ism_disable - Quiesce ISM workers
  *
  * @dm: The amdgpu display manager
  *
- * Disable the idle state manager by disabling any ISM work, canceling pending
- * work, and waiting for in-progress work to finish. After disabling, the 
system
- * is left in DM_ISM_STATE_FULL_POWER_RUNNING state.
+ * Cancels and disables any pending or in-flight ISM delayed work and waits
+ * for in-progress work to finish. After this returns, no ISM worker can run
+ * and subsequent mod_delayed_work() calls become no-ops via
+ * clear_pending_if_disabled().
+ *
+ * Must NOT be called with dc_lock held: the workers themselves take dc_lock,
+ * so a synchronous wait under dc_lock would deadlock.
+ *
+ * The caller is responsible for driving the FSM back to FULL_POWER_RUNNING
+ * (under dc_lock) by calling amdgpu_dm_ism_force_full_power().
  */
 void amdgpu_dm_ism_disable(struct amdgpu_display_manager *dm)
 {
@@ -538,21 +545,54 @@ void amdgpu_dm_ism_disable(struct amdgpu_display_manager 
*dm)
        struct amdgpu_crtc *acrtc;
        struct amdgpu_dm_ism *ism;
 
-       ASSERT(mutex_is_locked(&dm->dc_lock));
+       /*
+        * Caller must NOT hold dc_lock: the ISM delayed work handlers
+        * acquire dc_lock themselves, so waiting for them via
+        * disable_delayed_work_sync() while holding dc_lock would
+        * self-deadlock against an in-flight worker.
+        */
+       lockdep_assert_not_held(&dm->dc_lock);
 
        drm_for_each_crtc(crtc, dm->ddev) {
                acrtc = to_amdgpu_crtc(crtc);
                ism = &acrtc->ism;
 
-               /* Cancel and disable any pending work */
                disable_delayed_work_sync(&ism->delayed_work);
                disable_delayed_work_sync(&ism->sso_delayed_work);
+       }
+}
+
+/**
+ * amdgpu_dm_ism_force_full_power - Force every CRTC's ISM FSM to FULL_POWER
+ *
+ * @dm: The amdgpu display manager
+ *
+ * Sends DM_ISM_EVENT_EXIT_IDLE_REQUESTED to every CRTC's ISM, leaving each
+ * FSM in FULL_POWER_RUNNING. Intended to be paired with
+ * amdgpu_dm_ism_disable(): callers should first quiesce workers (without
+ * dc_lock), then take dc_lock and call this helper.
+ *
+ * Must be called with dc_lock held.
+ */
+void amdgpu_dm_ism_force_full_power(struct amdgpu_display_manager *dm)
+{
+       struct drm_crtc *crtc;
+       struct amdgpu_crtc *acrtc;
+
+       /*
+        * Caller must hold dc_lock: commit_event() drives the FSM and
+        * may touch dc state via dc_allow_idle_optimizations() etc.
+        */
+       lockdep_assert_held(&dm->dc_lock);
+
+       drm_for_each_crtc(crtc, dm->ddev) {
+               acrtc = to_amdgpu_crtc(crtc);
 
                /*
                 * When disabled, leave in FULL_POWER_RUNNING state.
-                * EXIT_IDLE will not queue any work
+                * EXIT_IDLE will not queue any work.
                 */
-               amdgpu_dm_ism_commit_event(ism,
+               amdgpu_dm_ism_commit_event(&acrtc->ism,
                                           DM_ISM_EVENT_EXIT_IDLE_REQUESTED);
        }
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
index 4df6a82972a8..72e2dac49e55 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
@@ -146,6 +146,7 @@ void amdgpu_dm_ism_fini(struct amdgpu_dm_ism *ism);
 void amdgpu_dm_ism_commit_event(struct amdgpu_dm_ism *ism,
                                enum amdgpu_dm_ism_event event);
 void amdgpu_dm_ism_disable(struct amdgpu_display_manager *dm);
+void amdgpu_dm_ism_force_full_power(struct amdgpu_display_manager *dm);
 void amdgpu_dm_ism_enable(struct amdgpu_display_manager *dm);
 
 #if IS_ENABLED(CONFIG_DRM_AMD_DC_KUNIT_TEST)
-- 
2.43.0

Reply via email to