On Tue, 20 Jan 2026 at 22:15, Li Chen <[email protected]> wrote: > > Kexec reboot does not reset PCI devices. > Invoking the full DRM/TTM teardown from ->shutdown can trigger WARNs when > userspace still holds DRM file descriptors. > > Quiesce the GPU through the suspend path and then power down the PCI > function so the next kernel can re-initialize the device from a consistent > state. > > WARNING: drivers/gpu/drm/drm_mode_config.c:578 at > drm_mode_config_cleanup+0x2e7/0x300, CPU#2: kexec/1300 > Call Trace: > <TASK> > ? srso_return_thunk+0x5/0x5f > ? enable_work+0x3a/0x100 > nouveau_display_destroy+0x39/0x70 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > nouveau_drm_device_fini+0x7b/0x1f0 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > nouveau_drm_shutdown+0x52/0xc0 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > pci_device_shutdown+0x35/0x60 > device_shutdown+0x11c/0x1b0 > kernel_kexec+0x13a/0x160 > __do_sys_reboot+0x209/0x240 > do_syscall_64+0x81/0x610 > ? srso_return_thunk+0x5/0x5f > ? __rtnl_unlock+0x37/0x70 > ? srso_return_thunk+0x5/0x5f > ? netdev_run_todo+0x63/0x570 > ? netif_change_flags+0x54/0x70 > ? srso_return_thunk+0x5/0x5f > ? devinet_ioctl+0x1e5/0x790 > ? srso_return_thunk+0x5/0x5f > ? inet_ioctl+0x1e9/0x200 > ? srso_return_thunk+0x5/0x5f > ? srso_return_thunk+0x5/0x5f > ? sock_do_ioctl+0x7d/0x130 > ? srso_return_thunk+0x5/0x5f > ? __x64_sys_ioctl+0x97/0xe0 > ? srso_return_thunk+0x5/0x5f > ? srso_return_thunk+0x5/0x5f > ? do_syscall_64+0x23b/0x610 > ? srso_return_thunk+0x5/0x5f > ? put_user_ifreq+0x7a/0x90 > ? srso_return_thunk+0x5/0x5f > ? sock_do_ioctl+0x107/0x130 > ? srso_return_thunk+0x5/0x5f > ? __x64_sys_ioctl+0x97/0xe0 > ? srso_return_thunk+0x5/0x5f > ? do_syscall_64+0x81/0x610 > ? srso_return_thunk+0x5/0x5f > ? exc_page_fault+0x7e/0x1a0 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > nouveau 0000:26:00.0: [drm] drm_WARN_ON(!list_empty(&fb->filp_head)) > WARNING: drivers/gpu/drm/drm_framebuffer.c:833 at > drm_framebuffer_free+0x73/0xa0, CPU#2: kexec/1300 > Call Trace: > <TASK> > drm_mode_config_cleanup+0x248/0x300 > ? __pfx___drm_printfn_dbg+0x10/0x10 > ? drm_mode_config_cleanup+0x1dc/0x300 > nouveau_display_destroy+0x39/0x70 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > nouveau_drm_device_fini+0x7b/0x1f0 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > nouveau_drm_shutdown+0x52/0xc0 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > pci_device_shutdown+0x35/0x60 > device_shutdown+0x11c/0x1b0 > kernel_kexec+0x13a/0x160 > __do_sys_reboot+0x209/0x240 > do_syscall_64+0x81/0x610 > ? srso_return_thunk+0x5/0x5f > ? __rtnl_unlock+0x37/0x70 > ? srso_return_thunk+0x5/0x5f > ? netdev_run_todo+0x63/0x570 > ? netif_change_flags+0x54/0x70 > ? srso_return_thunk+0x5/0x5f > ? devinet_ioctl+0x1e5/0x790 > ? srso_return_thunk+0x5/0x5f > ? inet_ioctl+0x1e9/0x200 > ? srso_return_thunk+0x5/0x5f > ? srso_return_thunk+0x5/0x5f > ? sock_do_ioctl+0x7d/0x130 > ? srso_return_thunk+0x5/0x5f > ? __x64_sys_ioctl+0x97/0xe0 > ? srso_return_thunk+0x5/0x5f > ? srso_return_thunk+0x5/0x5f > ? do_syscall_64+0x23b/0x610 > ? srso_return_thunk+0x5/0x5f > ? put_user_ifreq+0x7a/0x90 > ? srso_return_thunk+0x5/0x5f > ? sock_do_ioctl+0x107/0x130 > ? srso_return_thunk+0x5/0x5f > ? __x64_sys_ioctl+0x97/0xe0 > ? srso_return_thunk+0x5/0x5f > ? do_syscall_64+0x81/0x610 > ? srso_return_thunk+0x5/0x5f > ? exc_page_fault+0x7e/0x1a0 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > WARNING: include/drm/ttm/ttm_resource.h:406 at nouveau_ttm_fini+0x257/0x270 > [nouveau], CPU#2: kexec/1300 > Call Trace: > <TASK> > nouveau_drm_device_fini+0x93/0x1f0 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > nouveau_drm_shutdown+0x52/0xc0 [nouveau > c19e0da7fd83583a023f855c510d9a3903808734] > pci_device_shutdown+0x35/0x60 > device_shutdown+0x11c/0x1b0 > kernel_kexec+0x13a/0x160 > __do_sys_reboot+0x209/0x240 > do_syscall_64+0x81/0x610 > ? srso_return_thunk+0x5/0x5f > ? __rtnl_unlock+0x37/0x70 > ? srso_return_thunk+0x5/0x5f > ? netdev_run_todo+0x63/0x570 > ? netif_change_flags+0x54/0x70 > ? srso_return_thunk+0x5/0x5f > ? devinet_ioctl+0x1e5/0x790 > ? srso_return_thunk+0x5/0x5f > ? inet_ioctl+0x1e9/0x200 > ? srso_return_thunk+0x5/0x5f > ? srso_return_thunk+0x5/0x5f > ? sock_do_ioctl+0x7d/0x130 > ? srso_return_thunk+0x5/0x5f > ? __x64_sys_ioctl+0x97/0xe0 > ? srso_return_thunk+0x5/0x5f > ? srso_return_thunk+0x5/0x5f > ? do_syscall_64+0x23b/0x610 > ? srso_return_thunk+0x5/0x5f > ? put_user_ifreq+0x7a/0x90 > ? srso_return_thunk+0x5/0x5f > ? sock_do_ioctl+0x107/0x130 > ? srso_return_thunk+0x5/0x5f > ? __x64_sys_ioctl+0x97/0xe0 > ? srso_return_thunk+0x5/0x5f > ? do_syscall_64+0x81/0x610 > ? srso_return_thunk+0x5/0x5f > ? exc_page_fault+0x7e/0x1a0 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > Signed-off-by: Li Chen <[email protected]> > --- > drivers/gpu/drm/nouveau/nouveau_drm.c | 24 ++++++++++++++++++++++++ > 1 file changed, 24 insertions(+) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c > b/drivers/gpu/drm/nouveau/nouveau_drm.c > index 1527b801f013..50384462723b 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c > @@ -1079,6 +1079,29 @@ nouveau_pmops_resume(struct device *dev) > return ret; > } > > +static void > +nouveau_drm_shutdown(struct pci_dev *pdev) > +{ > + struct nouveau_drm *drm = pci_get_drvdata(pdev); > + int ret; > + > + if (!drm) > + return; > + > + if (drm->dev->switch_power_state == DRM_SWITCH_POWER_OFF || > + drm->dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF) > + return; > + > + ret = nouveau_do_suspend(drm, false); > + if (ret) > + NV_ERROR(drm, "shutdown suspend failed with: %d\n", ret); > + > + pci_save_state(pdev); > + pci_disable_device(pdev); > + pci_set_power_state(pdev, PCI_D3hot); > + usleep_range(200, 400);\
Why is this needed? it at least needs a comment. Dave.
