On 2020-05-12 00:28, Sam Bobroff wrote:
On Thu, May 07, 2020 at 08:10:37AM -0500, wenxi...@linux.vnet.ibm.com wrote:
From: Wen Xiong <wenxi...@linux.vnet.ibm.com>

Several device drivers hit EEH(Extended Error handling) when triggering
kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
in pci general code. PHB reset stop all PCI transactions from previous
kernel. We have tested the patch in several enviroments:
- direct slot adapters
- adapters under the switch
- a VF adapter in PowerVM
- a VF adapter/adapter in KVM guest.

Signed-off-by: Wen Xiong <wenxi...@linux.vnet.ibm.com>

Hi Wen Xiong,

I saw Oliver's review and I think he's covered the main issues I was
going to raise:
- This will run and produce some spurious errors on powernv. (I think
  distros do compile in both pseries and powernv.)
- There's a bit of code duplication but it's probably OK for this patch.


Hi Oliver and Sam,

Yes. Thanks for reviewing the code. I will keep some code duplication for now until Oliver re-work the EEH code. I am going to fix several minor things in error code patch.

Thanks,
Wendy

I have a few other minor comments, below:

---
arch/powerpc/platforms/pseries/pci.c | 153 +++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 911534b89c85..aac7f00696d2 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>

 #include <asm/eeh.h>
 #include <asm/pci-bridge.h>
@@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)

        return 0;
 }
+
+/**
+ * pseries_get_pdn_addr - Retrieve PHB address
+ * @pe: EEH PE
+ *
+ * Retrieve the assocated PHB address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ */
+static int pseries_get_pdn_addr(struct pci_controller *phb)
+{
+       int ret = -1;
+       int rets[3];
+       int ibm_get_config_addr_info;
+       int ibm_get_config_addr_info2;
+       int config_addr = 0;
+       struct pci_dn *root_pdn, *pdn;
+
+ ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
+
+       root_pdn = PCI_DN(phb->dn);
+       pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
+       config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+       if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+               /*
+                * First of all, we need to make sure there has one PE
+                * associated with the device. Otherwise, PE address is
+                * meaningless.
+                */

This comment might be better if it explained how using option=0
with ibm_get_config_addr tests the PE.

+               ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+                       config_addr, BUID_HI(pdn->phb->buid),
+                       BUID_LO(pdn->phb->buid), 1);
+               if (ret || (rets[0] == 0)) {
+                       pr_warn("%s: Failed to get address for PHB#%x-PE# "
+                               "option=%d config_addr=%x\n",
+                               __func__, pdn->phb->global_number, 1, rets[0]);
+                       return -1;
+               }
+
+               /* Retrieve the associated PE config address */
+               ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+                       config_addr, BUID_HI(pdn->phb->buid),
+                       BUID_LO(pdn->phb->buid), 0);
+               if (ret) {
+                       pr_warn("%s: Failed to get address for PHB#%x-PE# "
+                               "option=%d config_addr=%x\n",
+                               __func__, pdn->phb->global_number, 0, rets[0]);
+                       return -1;
+               }
+               return rets[0];
+       }
+
+       if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+               ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+                       config_addr, BUID_HI(pdn->phb->buid),
+                       BUID_LO(pdn->phb->buid), 0);
+               if (ret || rets[0]) {
+                       pr_warn("%s: Failed to get address for PHB#%x-PE# "
+                               "config_addr=%x\n",
+                               __func__, pdn->phb->global_number, rets[0]);
+                       return -1;
+               }
+               return rets[0];
+       }
+
+       return ret;
Can this ever return anything other than 0?

+}
+
+static int __init pseries_phb_reset(void)
+{
+       struct pci_controller *phb;
+       int config_addr;
+       int ibm_set_slot_reset;
+       int ibm_configure_pe;
+       int ret;
+
+       if (is_kdump_kernel() || reset_devices) {
+               pr_info("Issue PHB reset ...\n");
+               ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+               ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+               if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+                               ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+                       pr_info("%s: EEH functionality not supported\n",
+                               __func__);
+               }
+
+               list_for_each_entry(phb, &hose_list, list_node) {
+                       config_addr = pseries_get_pdn_addr(phb);
+                       if (config_addr == -1)
+                               continue;
+
+                       ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+                               config_addr, BUID_HI(phb->buid),
+                               BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
+
+                       /* If fundamental-reset not supported, try hot-reset */
+                       if (ret == -8)
+                               ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+                                       config_addr, BUID_HI(phb->buid),
+                                       BUID_LO(phb->buid), EEH_RESET_HOT);
+
+                       if (ret) {
+                               pr_err("%s: fail with rtas_call fundamental 
reset=%d\n",
+                                       __func__, ret);

This error might be a bit confusing, since it's not clear if the result
came from the fundamental or hot-reset.

+                               continue;
+                       }
+               }
+               msleep(EEH_PE_RST_SETTLE_TIME);
+
+               list_for_each_entry(phb, &hose_list, list_node) {
+                       config_addr = pseries_get_pdn_addr(phb);
+                       if (config_addr == -1)
+                               continue;
+
+                       ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+                               config_addr, BUID_HI(phb->buid),
+                               BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
+                       if (ret) {
+                               pr_err("%s: fail with rtas_call deactive=%d\n",
+                                       __func__, ret);
+                               continue;
+                       }
+               }
+               msleep(EEH_PE_RST_SETTLE_TIME);
+
+               list_for_each_entry(phb, &hose_list, list_node) {
+                       config_addr = pseries_get_pdn_addr(phb);
+                       if (config_addr == -1)
+                               continue;
+
+                       ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+                               config_addr, BUID_HI(phb->buid),
+                               BUID_LO(phb->buid));
+                       if (ret) {
+                               pr_err("%s: fail with rtas_call configure_pe 
=%d\n",
+                                       __func__, ret);

These errors might be more useful if they indicated which PHB caused the
error.

+                               continue;
+                       }
+               }
+       }
+
+       return 0;
+}
+postcore_initcall(pseries_phb_reset);
+
--
2.18.1

Reply via email to