On 2015/9/27 0:46, Borislav Petkov wrote: > On Wed, Sep 23, 2015 at 06:18:39PM +0200, Borislav Petkov wrote: >> On Wed, Sep 23, 2015 at 06:06:21PM +0200, Borislav Petkov wrote: >>> On Wed, Sep 23, 2015 at 04:44:50PM +0200, Daniel Vetter wrote: >>>> sorry I sprinkled the locking stuff in the wrong places. Still confused >>>> why the resume side doesn't blow up anywhere >>> >>> But it does: <snit> > > Ok, I bisected it. > > First of all, Daniel, you didn't see the resume side blow up because > of the NULL ptr deref f*cking up the box much earlier. Once I reverted > the bad commit by hand (it wouldn't revert cleanly) the resume splats > showed. > > And in talking about the bad commit, it is this one: > > 991de2e59090e55c65a7f59a049142e3c480f7bd is the first bad commit > commit 991de2e59090e55c65a7f59a049142e3c480f7bd > Author: Jiang Liu <jiang....@linux.intel.com> > Date: Wed Jun 10 16:54:59 2015 +0800 > > PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq() > > To support IOAPIC hotplug, we need to allocate PCI IRQ resources on demand > and free them when not used anymore. > > Implement pcibios_alloc_irq() and pcibios_free_irq() to dynamically > allocate and free PCI IRQs. > > Remove mp_should_keep_irq(), which is no longer used. > > [bhelgaas: changelog] > Signed-off-by: Jiang Liu <jiang....@linux.intel.com> > Signed-off-by: Bjorn Helgaas <bhelg...@google.com> > Acked-by: Thomas Gleixner <t...@linutronix.de> > > :040000 040000 765e2d5232d53247ec260b34b51589c3bccb36ae > f680234a27685e94b1a35ae2a7218f8eafa9071a M arch > :040000 040000 d55a682bcde72682e883365e88ad1df6186fd54d > f82c470a04a6845fcf5e0aa934512c75628f798d M drivers > > Jiang, you have to stop breaking my box with your changes. This is > maybe the third time I'm bisecting fallout from your patches. If you're > touching all x86, you need to test on an AMD box too. Like everyone else > testing on the hardware their changes affect. It is that simple. Hi Boris and Daniel, Sorry for the regression! I have tried to reproduce the regression by doing suspend/resume with a laptop, but failed. The PCI MSI suspend/resume code work as expected. And I have checked msi.c and radeon driver, but haven't gotten any hint about the cause. So could you please help to apply the attached debug patch to gather more information about the regression? Thanks! Gerry
> > Anyway, reverting that commit by hand fixes my resume splat. > > Here's the partial revert I did by hand: > > --- > diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h > index fa1195dae425..164e3f8d3c3d 100644 > --- a/arch/x86/include/asm/pci_x86.h > +++ b/arch/x86/include/asm/pci_x86.h > @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; > extern int (*pcibios_enable_irq)(struct pci_dev *dev); > extern void (*pcibios_disable_irq)(struct pci_dev *dev); > > +extern bool mp_should_keep_irq(struct device *dev); > + > struct pci_raw_ops { > int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, > int reg, int len, u32 *val); > diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c > index 09d3afc0a181..3bff24438b00 100644 > --- a/arch/x86/pci/common.c > +++ b/arch/x86/pci/common.c > @@ -672,20 +672,22 @@ int pcibios_add_device(struct pci_dev *dev) > return 0; > } > > -int pcibios_alloc_irq(struct pci_dev *dev) > +int pcibios_enable_device(struct pci_dev *dev, int mask) > { > - return pcibios_enable_irq(dev); > -} > + int err; > > -void pcibios_free_irq(struct pci_dev *dev) > -{ > - if (pcibios_disable_irq) > - pcibios_disable_irq(dev); > + if ((err = pci_enable_resources(dev, mask)) < 0) > + return err; > + > + if (!pci_dev_msi_enabled(dev)) > + return pcibios_enable_irq(dev); > + return 0; > } > > -int pcibios_enable_device(struct pci_dev *dev, int mask) > +void pcibios_disable_device (struct pci_dev *dev) > { > - return pci_enable_resources(dev, mask); > + if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq) > + pcibios_disable_irq(dev); > } > > int pci_ext_cfg_avail(void) > diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c > index 32e70343e6fd..f229834b36d4 100644 > --- a/arch/x86/pci/irq.c > +++ b/arch/x86/pci/irq.c > @@ -1186,6 +1186,18 @@ void pcibios_penalize_isa_irq(int irq, int active) > pirq_penalize_isa_irq(irq, active); > } > > +bool mp_should_keep_irq(struct device *dev) > +{ > + if (dev->power.is_prepared) > + return true; > +#ifdef CONFIG_PM > + if (dev->power.runtime_status == RPM_SUSPENDING) > + return true; > +#endif > + > + return false; > +} > + > static int pirq_enable_irq(struct pci_dev *dev) > { > u8 pin = 0; > @@ -1258,7 +1270,8 @@ static int pirq_enable_irq(struct pci_dev *dev) > > static void pirq_disable_irq(struct pci_dev *dev) > { > - if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) { > + if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && > + dev->irq_managed && dev->irq) { > mp_unmap_irq(dev->irq); > pci_reset_managed_irq(dev); > } > diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c > index 6da0f9beab19..d8a3f49a960c 100644 > --- a/drivers/acpi/pci_irq.c > +++ b/drivers/acpi/pci_irq.c > @@ -479,6 +479,14 @@ void acpi_pci_irq_disable(struct pci_dev *dev) > if (!pin || !pci_has_managed_irq(dev)) > return; > > + /* Keep IOAPIC pin configuration when suspending */ > + if (dev->dev.power.is_prepared) > + return; > +#ifdef CONFIG_PM > + if (dev->dev.power.runtime_status == RPM_SUSPENDING) > + return; > +#endif > + > entry = acpi_pci_irq_lookup(dev, pin); > if (!entry) > return; >
>From ef3eb76f0e700954bc865e86f9107f34e214216e Mon Sep 17 00:00:00 2001 From: Liu Jiang <jiang....@linux.intel.com> Date: Wed, 30 Sep 2015 00:57:33 +0800 Subject: [PATCH] Gather debug info --- drivers/gpu/drm/radeon/radeon_device.c | 2 ++ drivers/pci/msi.c | 3 +++ drivers/pci/pci-driver.c | 8 ++++++++ 3 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index d8319da..425515f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1627,10 +1627,12 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) radeon_agp_suspend(rdev); + dev_warn(&dev->pdev->dev, "irqdomain: before save pci state: msi%d irq%d\n", dev->pdev->msi_enabled, dev->pdev->irq); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ pci_disable_device(dev->pdev); + dev_warn(&dev->pdev->dev, "irqdomain: before save pci state: msi%d irq%d\n", dev->pdev->msi_enabled, dev->pdev->irq); pci_set_power_state(dev->pdev, PCI_D3hot); } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index d449714..c789bf1 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -405,6 +405,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev) return; entry = irq_get_msi_desc(dev->irq); + dev_warn(&dev->dev, "irqdomain: restore msi config irq%d, entry%p\n", dev->irq, entry); pci_intx_for_msi(dev, 0); pci_msi_set_enable(dev, 0); @@ -602,6 +603,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) int ret; unsigned mask; + dev_warn(&dev->dev, "irqdomain: enable msi\n"); pci_msi_set_enable(dev, 0); /* Disable MSI during set up */ entry = msi_setup_entry(dev, nvec); @@ -872,6 +874,7 @@ void pci_msi_shutdown(struct pci_dev *dev) if (!pci_msi_enable || !dev || !dev->msi_enabled) return; + dev_warn(&dev->dev, "irqdomain: shutdown msi\n"); BUG_ON(list_empty(dev_to_msi_list(&dev->dev))); desc = first_pci_msi_entry(dev); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index dd652f2..15352c1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -864,6 +864,7 @@ static int pci_pm_freeze(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + dev_warn(dev, "irqdomain: freeze msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); @@ -901,6 +902,7 @@ static int pci_pm_freeze_noirq(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; + dev_warn(dev, "irqdomain: freeze_noirq msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_FREEZE); @@ -930,6 +932,7 @@ static int pci_pm_thaw_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + dev_warn(dev, "irqdomain: thaw_noirq msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pcibios_pm_ops.thaw_noirq) { error = pcibios_pm_ops.thaw_noirq(dev); if (error) @@ -953,6 +956,7 @@ static int pci_pm_thaw(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; + dev_warn(dev, "irqdomain: thaw msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pcibios_pm_ops.thaw) { error = pcibios_pm_ops.thaw(dev); if (error) @@ -979,6 +983,7 @@ static int pci_pm_poweroff(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + dev_warn(dev, "irqdomain: poweroff msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); @@ -1014,6 +1019,7 @@ static int pci_pm_poweroff_noirq(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; + dev_warn(dev, "irqdomain: poweroff_noirq msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pci_has_legacy_pm_support(to_pci_dev(dev))) return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); @@ -1055,6 +1061,7 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + dev_warn(dev, "irqdomain: restore_noirq msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pcibios_pm_ops.restore_noirq) { error = pcibios_pm_ops.restore_noirq(dev); if (error) @@ -1078,6 +1085,7 @@ static int pci_pm_restore(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; + dev_warn(dev, "irqdomain: restore msi %d irq%d\n", pci_dev->msi_enabled, pci_dev->irq); if (pcibios_pm_ops.restore) { error = pcibios_pm_ops.restore(dev); if (error) -- 1.9.1