On 1/24/2026 12:32 PM, Hamza Mahfooz wrote:
On Fri, Jan 23, 2026 at 04:30:12PM -0600, Mario Limonciello wrote:
Hamza - since you seem to have a "workload" that can run overnight and this
series recovers, can you try what Alex said and do a dc_suspend() and
dc_resume() for failure?

Make sure you log a message so you can know it worked.

Sure, I'll try something along the lines of:

Generally speaking that looks good, but I'll leave a few comments.


diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 492457c86393..bc7abd00f5f4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -579,11 +579,28 @@ amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
  }
  #endif

-static void amdgpu_dm_crtc_handle_timeout(struct drm_crtc *crtc)
+static void amdgpu_dm_crtc_handle_timeout(struct drm_crtc *crtc,
+                                         struct drm_crtc_commit *commit)
  {
        struct amdgpu_device *adev = drm_to_adev(crtc->dev);
        struct amdgpu_reset_context reset_ctx;
+       struct amdgpu_ip_block *ip_block;

+       ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE);
+       if (!ip_block)
+               goto full_reset;
+
+       ip_block->version->funcs->suspend(ip_block);
+       ip_block->version->funcs->resume(ip_block);

Both of these can fail. Especially considering the page flip timeout could be a DCN hang, I think you should check return code for both of them sequentially and jump to the full reset condition if either fails.

+
+       if (drm_crtc_commit_wait(commit)) {
+               drm_err(crtc->dev, "suspend-resume failed!\n");
+               goto full_reset;
+       }
+

At least to prove "this worked" you should log a message "right here" that the reset occurred and you recovered. That "might not" be in the final version, but I think it's worth having for now.

+       return;
+
+full_reset:
        if (amdgpu_device_should_recover_gpu(adev)) {
                memset(&reset_ctx, 0, sizeof(reset_ctx));

diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
b/drivers/gpu/drm/drm_atomic_helper.c
index 7175294ccb57..b38c4ee2fc95 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1961,7 +1961,7 @@ void drm_atomic_helper_wait_for_flip_done(struct 
drm_device *dev,
                                crtc->base.id, crtc->name);

                        if (crtc->funcs->page_flip_timeout)
-                               crtc->funcs->page_flip_timeout(crtc);
+                               crtc->funcs->page_flip_timeout(crtc, commit);
                }
        }

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 45dc5a76e915..47a34a05f6de 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -616,7 +616,8 @@ struct drm_crtc_funcs {
         * and can be used by drivers to attempt to recover from a page flip
         * timeout.
         */
-       void (*page_flip_timeout)(struct drm_crtc *crtc);
+       void (*page_flip_timeout)(struct drm_crtc *crtc,
+                                 struct drm_crtc_commit *commit);

        /**
         * @set_property:

Reply via email to