>From 21d8f844bf1971b609e9b07086fddfc76e1d1ec7 Mon Sep 17 00:00:00 2001 From: Timothy Pearson <tpear...@raptorengineering.com> Date: Tue, 17 Jun 2025 23:16:37 -0500 Subject: [PATCH 4/8] powerpc/eeh: Make EEH driver device hotplug safe
Multiple race conditions existed between the PCIe hotplug driver and the EEH driver, leading to a variety of kernel oopses of the same general nature: <pcie device unplug> <eeh driver trigger> <hotplug removal trigger> <pcie tree reconfiguration> <eeh recovery next step> <oops in EEH driver bus iteration loop> A second class of oops is also seen when the underling bus disappears during device recovery. Refactor the EEH module to be PCI rescan and remove safe. Also clean up a few minor formatting / readability issues. Signed-off-by: Timothy Pearson <tpear...@raptorengineering.com> --- arch/powerpc/kernel/eeh_driver.c | 48 +++++++++++++------- arch/powerpc/kernel/eeh_pe.c | 10 ++-- arch/powerpc/platforms/powernv/eeh-powernv.c | 1 + 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7efe04c68f0f..dd50de91c438 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, struct pci_driver *driver; enum pci_ers_result new_result; - pci_lock_rescan_remove(); pdev = edev->pdev; if (pdev) get_device(&pdev->dev); - pci_unlock_rescan_remove(); if (!pdev) { eeh_edev_info(edev, "no device"); + *result = PCI_ERS_RESULT_DISCONNECT; return; } device_lock(&pdev->dev); @@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root, struct eeh_dev *edev, *tmp; pr_info("EEH: Beginning: '%s'\n", name); - eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) - eeh_pe_report_edev(edev, fn, result); + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); if (result) pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", name, pci_ers_result_name(*result)); @@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (!edev) return; + pci_lock_rescan_remove(); + /* * The content in the config space isn't saved because * the blocked config space on some adapters. We have @@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (list_is_last(&edev->entry, &edev->pe->edevs)) eeh_pe_restore_bars(edev->pe); + pci_unlock_rescan_remove(); return; } pdev = eeh_dev_to_pci_dev(edev); - if (!pdev) + if (!pdev) { + pci_unlock_rescan_remove(); return; + } pci_restore_state(pdev); + + pci_unlock_rescan_remove(); } /** @@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { - pci_lock_rescan_remove(); pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); } /* @@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (rc) return rc; - pci_lock_rescan_remove(); - /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { - pci_unlock_rescan_remove(); return rc; } @@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pe->tstamp = tstamp; pe->freeze_count = cnt; - pci_unlock_rescan_remove(); return 0; } @@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; int devices = 0; + pci_lock_rescan_remove(); + bus = eeh_pe_bus_get(pe); if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); + pci_unlock_rescan_remove(); return; } @@ -1094,10 +1098,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); + bus = eeh_pe_bus_get(pe); + if (bus) + pci_hp_remove_devices(bus); + else + pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n", + __func__, pe->phb->global_number, pe->addr); + /* The passed PE should no longer be used */ + pci_unlock_rescan_remove(); return; } @@ -1114,6 +1123,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_clear_slot_attention(edev->pdev); eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); + + pci_unlock_rescan_remove(); } /** @@ -1132,6 +1143,7 @@ void eeh_handle_special_event(void) unsigned long flags; int rc; + pci_lock_rescan_remove(); do { rc = eeh_ops->next_error(&pe); @@ -1171,10 +1183,12 @@ void eeh_handle_special_event(void) break; case EEH_NEXT_ERR_NONE: + pci_unlock_rescan_remove(); return; default: pr_warn("%s: Invalid value %d from next_error()\n", __func__, rc); + pci_unlock_rescan_remove(); return; } @@ -1186,7 +1200,9 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pci_unlock_rescan_remove(); eeh_handle_normal_event(pe); + pci_lock_rescan_remove(); } else { eeh_for_each_pe(pe, tmp_pe) eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) @@ -1199,7 +1215,6 @@ void eeh_handle_special_event(void) eeh_report_failure, NULL); eeh_set_channel_state(pe, pci_channel_io_perm_failure); - pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || @@ -1218,7 +1233,6 @@ void eeh_handle_special_event(void) } pci_hp_remove_devices(bus); } - pci_unlock_rescan_remove(); } /* @@ -1228,4 +1242,6 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_DEAD_IOC) break; } while (rc != EEH_NEXT_ERR_NONE); + + pci_unlock_rescan_remove(); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d283d281d28e..e740101fadf3 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -671,10 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - if (!edev->pdev->link_active_reporting) { - eeh_edev_dbg(edev, "No link reporting capability\n"); - msleep(1000); - return; + if (edev->pdev) { + if (!edev->pdev->link_active_reporting) { + eeh_edev_dbg(edev, "No link reporting capability\n"); + msleep(1000); + return; + } } /* Wait the link is up until timeout (5s) */ diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index db3370d1673c..5694fc8ee60f 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1117,6 +1117,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) if (pci_is_root_bus(bus->parent)) return pnv_eeh_root_reset(hose, option); return pnv_eeh_bridge_reset(bus->self, option); + } /** -- 2.39.5