From: Ray Wu <[email protected]>
[Why]
System hang observed during suspend/resume while video is playing.
amdgpu_dm_ism_disable() is called under dc_lock and waits for ISM
delayed work via disable_delayed_work_sync(). The work handlers
themselves take dc_lock, producing an ABBA deadlock when a worker is
in flight at suspend time.
[How]
Split the disable path into two phases with opposite locking
contracts:
1. amdgpu_dm_ism_disable() -- quiesces workers, must NOT hold
dc_lock.
2. amdgpu_dm_ism_force_full_power() (new) -- drives the ISM FSM
back to FULL_POWER_RUNNING, must hold dc_lock.
Reviewed-by: Sun peng (Leo) Li <[email protected]>
Signed-off-by: Ray Wu <[email protected]>
Signed-off-by: Ivan Lipski <[email protected]>
---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 25 +++++++--
.../drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c | 56 ++++++++++++++++---
.../drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h | 1 +
3 files changed, 70 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c41f017fe8f2..af0af7519517 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2327,9 +2327,16 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.idle_workqueue = NULL;
}
- /* Disable ISM before dc_destroy() invalidates dm->dc */
+ /*
+ * Disable ISM before dc_destroy() invalidates dm->dc.
+ *
+ * Quiesce workers first without dc_lock (they take dc_lock
+ * themselves, so syncing under it would deadlock), then drive the
+ * FSM back to FULL_POWER_RUNNING under dc_lock.
+ */
+ amdgpu_dm_ism_disable(&adev->dm);
scoped_guard(mutex, &adev->dm.dc_lock)
- amdgpu_dm_ism_disable(&adev->dm);
+ amdgpu_dm_ism_force_full_power(&adev->dm);
amdgpu_dm_destroy_drm_device(&adev->dm);
@@ -3362,9 +3369,14 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)
if (amdgpu_in_reset(adev)) {
enum dc_status res;
+ /* Quiesce ISM workers before taking dc_lock (workers take
+ * dc_lock themselves; syncing under it would deadlock).
+ */
+ amdgpu_dm_ism_disable(dm);
+
mutex_lock(&dm->dc_lock);
- amdgpu_dm_ism_disable(dm);
+ amdgpu_dm_ism_force_full_power(dm);
dc_allow_idle_optimizations(adev->dm.dc, false);
dm->cached_dc_state =
dc_state_create_copy(dm->dc->current_state);
@@ -3398,8 +3410,13 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)
amdgpu_dm_irq_suspend(adev);
+ /*
+ * Quiesce ISM workers before taking dc_lock (workers take dc_lock
+ * themselves; syncing under it would deadlock).
+ */
+ amdgpu_dm_ism_disable(dm);
scoped_guard(mutex, &dm->dc_lock)
- amdgpu_dm_ism_disable(dm);
+ amdgpu_dm_ism_force_full_power(dm);
hpd_rx_irq_work_suspend(dm);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
index bc7db5e759d1..857c22007743 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.c
@@ -524,13 +524,20 @@ static void dm_ism_sso_delayed_work_func(struct
work_struct *work)
}
/**
- * amdgpu_dm_ism_disable - Disable the ISM
+ * amdgpu_dm_ism_disable - Quiesce ISM workers
*
* @dm: The amdgpu display manager
*
- * Disable the idle state manager by disabling any ISM work, canceling pending
- * work, and waiting for in-progress work to finish. After disabling, the
system
- * is left in DM_ISM_STATE_FULL_POWER_RUNNING state.
+ * Cancels and disables any pending or in-flight ISM delayed work and waits
+ * for in-progress work to finish. After this returns, no ISM worker can run
+ * and subsequent mod_delayed_work() calls become no-ops via
+ * clear_pending_if_disabled().
+ *
+ * Must NOT be called with dc_lock held: the workers themselves take dc_lock,
+ * so a synchronous wait under dc_lock would deadlock.
+ *
+ * The caller is responsible for driving the FSM back to FULL_POWER_RUNNING
+ * (under dc_lock) by calling amdgpu_dm_ism_force_full_power().
*/
void amdgpu_dm_ism_disable(struct amdgpu_display_manager *dm)
{
@@ -538,21 +545,54 @@ void amdgpu_dm_ism_disable(struct amdgpu_display_manager
*dm)
struct amdgpu_crtc *acrtc;
struct amdgpu_dm_ism *ism;
- ASSERT(mutex_is_locked(&dm->dc_lock));
+ /*
+ * Caller must NOT hold dc_lock: the ISM delayed work handlers
+ * acquire dc_lock themselves, so waiting for them via
+ * disable_delayed_work_sync() while holding dc_lock would
+ * self-deadlock against an in-flight worker.
+ */
+ lockdep_assert_not_held(&dm->dc_lock);
drm_for_each_crtc(crtc, dm->ddev) {
acrtc = to_amdgpu_crtc(crtc);
ism = &acrtc->ism;
- /* Cancel and disable any pending work */
disable_delayed_work_sync(&ism->delayed_work);
disable_delayed_work_sync(&ism->sso_delayed_work);
+ }
+}
+
+/**
+ * amdgpu_dm_ism_force_full_power - Force every CRTC's ISM FSM to FULL_POWER
+ *
+ * @dm: The amdgpu display manager
+ *
+ * Sends DM_ISM_EVENT_EXIT_IDLE_REQUESTED to every CRTC's ISM, leaving each
+ * FSM in FULL_POWER_RUNNING. Intended to be paired with
+ * amdgpu_dm_ism_disable(): callers should first quiesce workers (without
+ * dc_lock), then take dc_lock and call this helper.
+ *
+ * Must be called with dc_lock held.
+ */
+void amdgpu_dm_ism_force_full_power(struct amdgpu_display_manager *dm)
+{
+ struct drm_crtc *crtc;
+ struct amdgpu_crtc *acrtc;
+
+ /*
+ * Caller must hold dc_lock: commit_event() drives the FSM and
+ * may touch dc state via dc_allow_idle_optimizations() etc.
+ */
+ lockdep_assert_held(&dm->dc_lock);
+
+ drm_for_each_crtc(crtc, dm->ddev) {
+ acrtc = to_amdgpu_crtc(crtc);
/*
* When disabled, leave in FULL_POWER_RUNNING state.
- * EXIT_IDLE will not queue any work
+ * EXIT_IDLE will not queue any work.
*/
- amdgpu_dm_ism_commit_event(ism,
+ amdgpu_dm_ism_commit_event(&acrtc->ism,
DM_ISM_EVENT_EXIT_IDLE_REQUESTED);
}
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
index 4df6a82972a8..72e2dac49e55 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_ism.h
@@ -146,6 +146,7 @@ void amdgpu_dm_ism_fini(struct amdgpu_dm_ism *ism);
void amdgpu_dm_ism_commit_event(struct amdgpu_dm_ism *ism,
enum amdgpu_dm_ism_event event);
void amdgpu_dm_ism_disable(struct amdgpu_display_manager *dm);
+void amdgpu_dm_ism_force_full_power(struct amdgpu_display_manager *dm);
void amdgpu_dm_ism_enable(struct amdgpu_display_manager *dm);
#if IS_ENABLED(CONFIG_DRM_AMD_DC_KUNIT_TEST)
--
2.43.0