>From 21d8f844bf1971b609e9b07086fddfc76e1d1ec7 Mon Sep 17 00:00:00 2001
From: Timothy Pearson <tpear...@raptorengineering.com>
Date: Tue, 17 Jun 2025 23:16:37 -0500
Subject: [PATCH 4/8] powerpc/eeh: Make EEH driver device hotplug safe

Multiple race conditions existed between the PCIe hotplug driver and
the EEH driver, leading to a variety of kernel oopses of the same
general nature:

<pcie device unplug>
<eeh driver trigger>
<hotplug removal trigger>
<pcie tree reconfiguration>
<eeh recovery next step>
<oops in EEH driver bus iteration loop>

A second class of oops is also seen when the underling bus disappears
during device recovery.

Refactor the EEH module to be PCI rescan and remove safe.  Also clean
up a few minor formatting / readability issues.

Signed-off-by: Timothy Pearson <tpear...@raptorengineering.com>
---
 arch/powerpc/kernel/eeh_driver.c             | 48 +++++++++++++-------
 arch/powerpc/kernel/eeh_pe.c                 | 10 ++--
 arch/powerpc/platforms/powernv/eeh-powernv.c |  1 +
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7efe04c68f0f..dd50de91c438 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, 
eeh_report_fn fn,
        struct pci_driver *driver;
        enum pci_ers_result new_result;
 
-       pci_lock_rescan_remove();
        pdev = edev->pdev;
        if (pdev)
                get_device(&pdev->dev);
-       pci_unlock_rescan_remove();
        if (!pdev) {
                eeh_edev_info(edev, "no device");
+               *result = PCI_ERS_RESULT_DISCONNECT;
                return;
        }
        device_lock(&pdev->dev);
@@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe 
*root,
        struct eeh_dev *edev, *tmp;
 
        pr_info("EEH: Beginning: '%s'\n", name);
-       eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
-               eeh_pe_report_edev(edev, fn, result);
+       eeh_for_each_pe(root, pe)
+               eeh_pe_for_each_dev(pe, edev, tmp)
+                       eeh_pe_report_edev(edev, fn, result);
        if (result)
                pr_info("EEH: Finished:'%s' with aggregate recovery 
state:'%s'\n",
                        name, pci_ers_result_name(*result));
@@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, 
void *userdata)
        if (!edev)
                return;
 
+       pci_lock_rescan_remove();
+
        /*
         * The content in the config space isn't saved because
         * the blocked config space on some adapters. We have
@@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, 
void *userdata)
                if (list_is_last(&edev->entry, &edev->pe->edevs))
                        eeh_pe_restore_bars(edev->pe);
 
+               pci_unlock_rescan_remove();
                return;
        }
 
        pdev = eeh_dev_to_pci_dev(edev);
-       if (!pdev)
+       if (!pdev) {
+               pci_unlock_rescan_remove();
                return;
+       }
 
        pci_restore_state(pdev);
+
+       pci_unlock_rescan_remove();
 }
 
 /**
@@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
        if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
                eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
        } else {
-               pci_lock_rescan_remove();
                pci_hp_remove_devices(bus);
-               pci_unlock_rescan_remove();
        }
 
        /*
@@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
        if (rc)
                return rc;
 
-       pci_lock_rescan_remove();
-
        /* Restore PE */
        eeh_ops->configure_bridge(pe);
        eeh_pe_restore_bars(pe);
@@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
        /* Clear frozen state */
        rc = eeh_clear_pe_frozen_state(pe, false);
        if (rc) {
-               pci_unlock_rescan_remove();
                return rc;
        }
 
@@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
        pe->tstamp = tstamp;
        pe->freeze_count = cnt;
 
-       pci_unlock_rescan_remove();
        return 0;
 }
 
@@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
        int devices = 0;
 
+       pci_lock_rescan_remove();
+
        bus = eeh_pe_bus_get(pe);
        if (!bus) {
                pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
                        __func__, pe->phb->global_number, pe->addr);
+               pci_unlock_rescan_remove();
                return;
        }
 
@@ -1094,10 +1098,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
                eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
-               pci_lock_rescan_remove();
-               pci_hp_remove_devices(bus);
-               pci_unlock_rescan_remove();
+               bus = eeh_pe_bus_get(pe);
+               if (bus)
+                       pci_hp_remove_devices(bus);
+               else
+                       pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n",
+                               __func__, pe->phb->global_number, pe->addr);
+
                /* The passed PE should no longer be used */
+               pci_unlock_rescan_remove();
                return;
        }
 
@@ -1114,6 +1123,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                        eeh_clear_slot_attention(edev->pdev);
 
        eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+
+       pci_unlock_rescan_remove();
 }
 
 /**
@@ -1132,6 +1143,7 @@ void eeh_handle_special_event(void)
        unsigned long flags;
        int rc;
 
+       pci_lock_rescan_remove();
 
        do {
                rc = eeh_ops->next_error(&pe);
@@ -1171,10 +1183,12 @@ void eeh_handle_special_event(void)
 
                        break;
                case EEH_NEXT_ERR_NONE:
+                       pci_unlock_rescan_remove();
                        return;
                default:
                        pr_warn("%s: Invalid value %d from next_error()\n",
                                __func__, rc);
+                       pci_unlock_rescan_remove();
                        return;
                }
 
@@ -1186,7 +1200,9 @@ void eeh_handle_special_event(void)
                if (rc == EEH_NEXT_ERR_FROZEN_PE ||
                    rc == EEH_NEXT_ERR_FENCED_PHB) {
                        eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+                       pci_unlock_rescan_remove();
                        eeh_handle_normal_event(pe);
+                       pci_lock_rescan_remove();
                } else {
                        eeh_for_each_pe(pe, tmp_pe)
                                eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
@@ -1199,7 +1215,6 @@ void eeh_handle_special_event(void)
                                eeh_report_failure, NULL);
                        eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 
-                       pci_lock_rescan_remove();
                        list_for_each_entry(hose, &hose_list, list_node) {
                                phb_pe = eeh_phb_pe_get(hose);
                                if (!phb_pe ||
@@ -1218,7 +1233,6 @@ void eeh_handle_special_event(void)
                                }
                                pci_hp_remove_devices(bus);
                        }
-                       pci_unlock_rescan_remove();
                }
 
                /*
@@ -1228,4 +1242,6 @@ void eeh_handle_special_event(void)
                if (rc == EEH_NEXT_ERR_DEAD_IOC)
                        break;
        } while (rc != EEH_NEXT_ERR_NONE);
+
+       pci_unlock_rescan_remove();
 }
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d283d281d28e..e740101fadf3 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -671,10 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
        eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
 
        /* Check link */
-       if (!edev->pdev->link_active_reporting) {
-               eeh_edev_dbg(edev, "No link reporting capability\n");
-               msleep(1000);
-               return;
+       if (edev->pdev) {
+               if (!edev->pdev->link_active_reporting) {
+                       eeh_edev_dbg(edev, "No link reporting capability\n");
+                       msleep(1000);
+                       return;
+               }
        }
 
        /* Wait the link is up until timeout (5s) */
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index db3370d1673c..5694fc8ee60f 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1117,6 +1117,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
        if (pci_is_root_bus(bus->parent))
                return pnv_eeh_root_reset(hose, option);
        return pnv_eeh_bridge_reset(bus->self, option);
+
 }
 
 /**
-- 
2.39.5

Reply via email to