The eeh_sysfs_remove_device() function is supposed to clear the
EEH_DEV_SYSFS flag since it indicates the EEH sysfs entries have been added
for a pci_dev.

When the sysfs files are removed eeh_remove_device() the eeh_dev and the
pci_dev have already been de-associated. This then causes the
pci_dev_to_eeh_dev() call in eeh_sysfs_remove_device() to return NULL so
the flag can't be cleared from the still-live eeh_dev. This problem is
worked around in the caller by clearing the flag manually. However, this
behaviour doesn't make a whole lot of sense, so this patch fixes it by:

a) Re-ordering eeh_remove_device() so that eeh_sysfs_remove_device() is
   called before de-associating the pci_dev and eeh_dev.

b) Making eeh_sysfs_remove_device() emit a warning if there's no
   corresponding eeh_dev for a pci_dev. The paths where the sysfs
   files are only reachable if EEH was setup for the device
   for the device in the first place so hitting this warning
   indicates a programming error.

Signed-off-by: Oliver O'Halloran <ooh...@gmail.com>
---
 arch/powerpc/kernel/eeh.c       | 30 +++++++++++++++++-------------
 arch/powerpc/kernel/eeh_sysfs.c | 15 ++++++++-------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f192d57..6e24896 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1203,7 +1203,6 @@ void eeh_add_device_late(struct pci_dev *dev)
                eeh_rmv_from_parent_pe(edev);
                eeh_addr_cache_rmv_dev(edev->pdev);
                eeh_sysfs_remove_device(edev->pdev);
-               edev->mode &= ~EEH_DEV_SYSFS;
 
                /*
                 * We definitely should have the PCI device removed
@@ -1306,17 +1305,11 @@ void eeh_remove_device(struct pci_dev *dev)
        edev->pdev = NULL;
 
        /*
-        * The flag "in_error" is used to trace EEH devices for VFs
-        * in error state or not. It's set in eeh_report_error(). If
-        * it's not set, eeh_report_{reset,resume}() won't be called
-        * for the VF EEH device.
+        * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
+        * remove the sysfs files before clearing dev.archdata.edev
         */
-       edev->in_error = false;
-       dev->dev.archdata.edev = NULL;
-       if (!(edev->pe->state & EEH_PE_KEEP))
-               eeh_rmv_from_parent_pe(edev);
-       else
-               edev->mode |= EEH_DEV_DISCONNECTED;
+       if (edev->mode & EEH_DEV_SYSFS)
+               eeh_sysfs_remove_device(dev);
 
        /*
         * We're removing from the PCI subsystem, that means
@@ -1327,8 +1320,19 @@ void eeh_remove_device(struct pci_dev *dev)
        edev->mode |= EEH_DEV_NO_HANDLER;
 
        eeh_addr_cache_rmv_dev(dev);
-       eeh_sysfs_remove_device(dev);
-       edev->mode &= ~EEH_DEV_SYSFS;
+
+       /*
+        * The flag "in_error" is used to trace EEH devices for VFs
+        * in error state or not. It's set in eeh_report_error(). If
+        * it's not set, eeh_report_{reset,resume}() won't be called
+        * for the VF EEH device.
+        */
+       edev->in_error = false;
+       dev->dev.archdata.edev = NULL;
+       if (!(edev->pe->state & EEH_PE_KEEP))
+               eeh_rmv_from_parent_pe(edev);
+       else
+               edev->mode |= EEH_DEV_DISCONNECTED;
 }
 
 int eeh_unfreeze_pe(struct eeh_pe *pe)
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index c4cc8fc..5614fd83 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -175,22 +175,23 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
 {
        struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 
+       if (!edev) {
+               WARN_ON(eeh_enabled());
+               return;
+       }
+
+       edev->mode &= ~EEH_DEV_SYSFS;
+
        /*
         * The parent directory might have been removed. We needn't
         * continue for that case.
         */
-       if (!pdev->dev.kobj.sd) {
-               if (edev)
-                       edev->mode &= ~EEH_DEV_SYSFS;
+       if (!pdev->dev.kobj.sd)
                return;
-       }
 
        device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
        eeh_notify_resume_remove(pdev);
-
-       if (edev)
-               edev->mode &= ~EEH_DEV_SYSFS;
 }
-- 
2.9.5

Reply via email to