On Wed, May 20, 2020 at 6:40 AM Evan Quan <evan.q...@amd.com> wrote: > > The prompts will contain pci address(segment/bus/port/function), > severity(warn or error) and some keywords(GPU, amdgpu). Also this > address the issue that pci bus retrieved by PCI_BUS_NUM(adev->pdev->devfn) > is wrong. > > Change-Id: I714d1dffb30a6cf76dcede087cf5d9302f683ed8 > Signed-off-by: Evan Quan <evan.q...@amd.com>
Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> > --- > .../gpu/drm/amd/powerplay/hwmgr/smu_helper.c | 38 +++++-------------- > drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 26 ++++--------- > 2 files changed, 17 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c > index 4279f95ba779..60b5ca974356 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c > @@ -597,58 +597,40 @@ int phm_irq_process(struct amdgpu_device *adev, > > if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) { > if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) > { > - pr_warn("GPU over temperature range detected on PCIe > %d:%d.%d!\n", > - > PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU over temperature > range(SW CTF) detected!\n"); > /* > * SW CTF just occurred. > * Try to do a graceful shutdown to prevent further > damage. > */ > - dev_emerg(adev->dev, "System is going to shutdown due > to SW CTF!\n"); > + dev_emerg(adev->dev, "ERROR: System is going to > shutdown due to GPU SW CTF!\n"); > orderly_poweroff(true); > } else if (src_id == > VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW) > - pr_warn("GPU under temperature range detected on PCIe > %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU under temperature > range detected!\n"); > else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) { > - pr_warn("GPU Critical Temperature Fault detected on > PCIe %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU HW Critical > Temperature Fault(aka CTF) detected!\n"); > /* > * HW CTF just occurred. Shutdown to prevent further > damage. > */ > - dev_emerg(adev->dev, "System is going to shutdown due > to HW CTF!\n"); > + dev_emerg(adev->dev, "ERROR: System is going to > shutdown due to GPU HW CTF!\n"); > orderly_poweroff(true); > } > } else if (client_id == SOC15_IH_CLIENTID_THM) { > if (src_id == 0) { > - pr_warn("GPU over temperature range detected on PCIe > %d:%d.%d!\n", > - > PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU over temperature > range(SW CTF) detected!\n"); > /* > * SW CTF just occurred. > * Try to do a graceful shutdown to prevent further > damage. > */ > - dev_emerg(adev->dev, "System is going to shutdown due > to SW CTF!\n"); > + dev_emerg(adev->dev, "ERROR: System is going to > shutdown due to GPU SW CTF!\n"); > orderly_poweroff(true); > } else > - pr_warn("GPU under temperature range detected on PCIe > %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU under temperature > range detected!\n"); > } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) { > - pr_warn("GPU Critical Temperature Fault detected on PCIe > %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature > Fault(aka CTF) detected!\n"); > /* > * HW CTF just occurred. Shutdown to prevent further damage. > */ > - dev_emerg(adev->dev, "System is going to shutdown due to HW > CTF!\n"); > + dev_emerg(adev->dev, "ERROR: System is going to shutdown due > to GPU HW CTF!\n"); > orderly_poweroff(true); > } > > diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c > b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c > index c1ba77344107..f56789f8ec11 100644 > --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c > +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c > @@ -1540,40 +1540,28 @@ static int smu_v11_0_irq_process(struct amdgpu_device > *adev, > if (client_id == SOC15_IH_CLIENTID_THM) { > switch (src_id) { > case THM_11_0__SRCID__THM_DIG_THERM_L2H: > - pr_warn("GPU over temperature range detected on PCIe > %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU over temperature > range(SW CTF) detected!\n"); > /* > * SW CTF just occurred. > * Try to do a graceful shutdown to prevent further > damage. > */ > - dev_emerg(adev->dev, "System is going to shutdown due > to SW CTF!\n"); > + dev_emerg(adev->dev, "ERROR: System is going to > shutdown due to GPU SW CTF!\n"); > orderly_poweroff(true); > break; > case THM_11_0__SRCID__THM_DIG_THERM_H2L: > - pr_warn("GPU under temperature range detected on PCIe > %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU under temperature > range detected\n"); > break; > default: > - pr_warn("GPU under temperature range unknown src id > (%d), detected on PCIe %d:%d.%d!\n", > - src_id, > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU under temperature > range unknown src id (%d)\n", > + src_id); > break; > } > } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) { > - pr_warn("GPU Critical Temperature Fault detected on PCIe > %d:%d.%d!\n", > - PCI_BUS_NUM(adev->pdev->devfn), > - PCI_SLOT(adev->pdev->devfn), > - PCI_FUNC(adev->pdev->devfn)); > + dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature > Fault(aka CTF) detected!\n"); > /* > * HW CTF just occurred. Shutdown to prevent further damage. > */ > - dev_emerg(adev->dev, "System is going to shutdown due to HW > CTF!\n"); > + dev_emerg(adev->dev, "ERROR: System is going to shutdown due > to GPU HW CTF!\n"); > orderly_poweroff(true); > } else if (client_id == SOC15_IH_CLIENTID_MP1) { > if (src_id == 0xfe) { > -- > 2.26.2 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx