On Mon, Nov 02, 2015 at 10:40:36AM +1100, Alexey Kardashevskiy wrote:
>On 11/01/2015 12:53 PM, Wei Yang wrote:
>>On Fri, Oct 30, 2015 at 04:20:48PM +1100, Alexey Kardashevskiy wrote:
>>>On 10/26/2015 02:16 PM, Wei Yang wrote:
>>>>Different from PCI bus dependent PE, PE for VFs doesn't have the
>>>
>>>s/Different from/Unlike/
>>>
>>
>>Will change in next version.
>>
>>>
>>>>primary bus, on which the PCI hotplug is implemented. The patch
>>>>supports error recovery, especially the PCI hotplug for VF's PE.
>>>
>>>The patch adds support for error recovery of what exactly?
>>>What is "especially" about?
>>>
>>
>>PFs are enumerated on PCI bus, while VFs are created by PF's driver.
>>
>>In EEH recovery, it has two cases.
>>1. Device and driver is EEH aware, error handlers are called.
>>2. Device and driver is not EEH aware, un-plug the device and plug it again by
>>    enumerating it.
>>
>>The special thing happens on the second case. For a PF, we could use the
>>original pci core to enumerate the bus, while for VF, we need to record the VF
>>which are un-plugged then plug it again.
>
>
>Right. This should have been the actual commit log.
>
>
>>>
>>>>The hotplug on VF's PE is implemented based on VFs, instead of
>>>>PCI bus any more.
>>>
>>>Needs rephrase.
>>>
>>>Is this patch about EEH error recovery, i.e. unplug VF, re-plug VF? Why does
>>>the commit log talk about PE hotplug? I thought we do VF (i.e. PCI device)
>>>hotplug, not PE.
>>>
>>
>>Hmm... unlike the Bus PE for PFs, VF PE is dynamically created and released
>>when VFs are created and released.
>
>
>Sure. PEs are created/released, not plugged/unplugged (VFs are), that was my
>point.
>

Thanks for the suggestion, will change it in next version.

>
>>
>>>
>>>>
>>>>[gwshan: changelog and code refactoring]
>>>>Signed-off-by: Wei Yang <weiy...@linux.vnet.ibm.com>
>>>>Acked-by: Gavin Shan <gws...@linux.vnet.ibm.com>
>>>>---
>>>>  arch/powerpc/include/asm/eeh.h   |   1 +
>>>>  arch/powerpc/kernel/eeh.c        |   8 ++++
>>>>  arch/powerpc/kernel/eeh_driver.c | 100 
>>>> +++++++++++++++++++++++++++++++--------
>>>>  arch/powerpc/kernel/eeh_pe.c     |   3 +-
>>>>  4 files changed, 90 insertions(+), 22 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>>>index 331c856..ea1f13c4 100644
>>>>--- a/arch/powerpc/include/asm/eeh.h
>>>>+++ b/arch/powerpc/include/asm/eeh.h
>>>>@@ -142,6 +142,7 @@ struct eeh_dev {
>>>>    struct pci_controller *phb;     /* Associated PHB               */
>>>>    struct pci_dn *pdn;             /* Associated PCI device node   */
>>>>    struct pci_dev *pdev;           /* Associated PCI device        */
>>>>+   int    in_error;                /* Error flag for eeh_dev       */
>>>
>>>Make it "bool".
>>>
>>
>>Will change it in next version.
>>
>>>
>>>>    struct pci_dev *physfn;         /* Associated PF PORT           */
>>>>    struct pci_bus *bus;            /* PCI bus for partial hotplug  */
>>>>  };
>>>>diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>>>index af9b597..28e4d73 100644
>>>>--- a/arch/powerpc/kernel/eeh.c
>>>>+++ b/arch/powerpc/kernel/eeh.c
>>>>@@ -1227,6 +1227,14 @@ void eeh_remove_device(struct pci_dev *dev)
>>>>     * from the parent PE during the BAR resotre.
>>>>     */
>>>>    edev->pdev = NULL;
>>>>+
>>>>+   /*
>>>>+    * The flag "in_error" is used to trace EEH devices for VFs
>>>>+    * in error state or not. It's set in eeh_report_error(). If
>>>>+    * it's not set, eeh_report_{reset,resume}() won't be called
>>>>+    * for the VF EEH device.
>>>>+    */
>>>>+   edev->in_error = 0;
>>>>    dev->dev.archdata.edev = NULL;
>>>>    if (!(edev->pe->state & EEH_PE_KEEP))
>>>>            eeh_rmv_from_parent_pe(edev);
>>>>diff --git a/arch/powerpc/kernel/eeh_driver.c 
>>>>b/arch/powerpc/kernel/eeh_driver.c
>>>>index 89eb4bc..99868e2 100644
>>>>--- a/arch/powerpc/kernel/eeh_driver.c
>>>>+++ b/arch/powerpc/kernel/eeh_driver.c
>>>>@@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void 
>>>>*userdata)
>>>>    if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>>>>    if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>>>>
>>>>+   edev->in_error = 1;
>>>>    eeh_pcid_put(dev);
>>>>    return NULL;
>>>>  }
>>>>@@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void 
>>>>*userdata)
>>>>
>>>>    if (!driver->err_handler ||
>>>>        !driver->err_handler->slot_reset ||
>>>>-       (edev->mode & EEH_DEV_NO_HANDLER)) {
>>>>+       (edev->mode & EEH_DEV_NO_HANDLER) ||
>>>>+       (!edev->in_error)) {
>>>>            eeh_pcid_put(dev);
>>>>            return NULL;
>>>>    }
>>>>@@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void 
>>>>*userdata)
>>>>
>>>
>>>bood was_in_error = edev->in_error;
>>>edev->in_error = false;
>>>
>>>then use was_in_error below and there is no need to replace return with goto,
>>>etc -> slightly simpler code.
>>>
>>
>>Will change it in next version.
>>
>>>
>>>>    if (!driver->err_handler ||
>>>>        !driver->err_handler->resume ||
>>>>-       (edev->mode & EEH_DEV_NO_HANDLER)) {
>>>>+       (edev->mode & EEH_DEV_NO_HANDLER) ||
>>>>+       (!edev->in_error)) {
>>>>            edev->mode &= ~EEH_DEV_NO_HANDLER;
>>>>-           eeh_pcid_put(dev);
>>>>-           return NULL;
>>>>+           goto out;
>>>>    }
>>>>
>>>>    driver->err_handler->resume(dev);
>>>>
>>>>+out:
>>>>+   edev->in_error = 0;
>>>>    eeh_pcid_put(dev);
>>>>    return NULL;
>>>>  }
>>>>@@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void 
>>>>*userdata)
>>>>    return NULL;
>>>>  }
>>>>
>>>>+static void *eeh_add_virt_device(void *data, void *userdata)
>>>>+{
>>>>+   struct pci_driver *driver;
>>>>+   struct eeh_dev *edev = (struct eeh_dev *)data;
>>>>+   struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>>>+   struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>>>+
>>>>+   if (!(edev->physfn)) {
>>>>+           pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
>>>>+                   __func__, edev->phb->global_number, pdn->busno,
>>>>+                   PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>>>+           return NULL;
>>>>+   }
>>>>+
>>>>+   driver = eeh_pcid_get(dev);
>>>>+   if (driver) {
>>>>+           eeh_pcid_put(dev);
>>>>+           if (driver->err_handler)
>>>>+                   return NULL;
>>>>+   }
>>>>+
>>>>+   pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
>>>>+   return NULL;
>>>>+}
>>>>+
>>>>  static void *eeh_rmv_device(void *data, void *userdata)
>>>>  {
>>>>    struct pci_driver *driver;
>>>>    struct eeh_dev *edev = (struct eeh_dev *)data;
>>>>    struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>>>    int *removed = (int *)userdata;
>>>>+   struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>>>
>>>>    /*
>>>>     * Actually, we should remove the PCI bridges as well.
>>>>@@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>>>    driver = eeh_pcid_get(dev);
>>>>    if (driver) {
>>>>            eeh_pcid_put(dev);
>>>>-           if (driver->err_handler)
>>>>+           if (removed && driver->err_handler)
>>>>                    return NULL;
>>>>    }
>>>>
>>>>@@ -425,11 +455,23 @@ static void *eeh_rmv_device(void *data, void 
>>>>*userdata)
>>>>             pci_name(dev));
>>>>    edev->bus = dev->bus;
>>>>    edev->mode |= EEH_DEV_DISCONNECTED;
>>>>-   (*removed)++;
>>>>+   if (removed)
>>>>+           (*removed)++;
>>>>
>>>>-   pci_lock_rescan_remove();
>>>>-   pci_stop_and_remove_bus_device(dev);
>>>>-   pci_unlock_rescan_remove();
>>>>+   if (edev->physfn) {
>>>>+           pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
>>>>+           edev->pdev = NULL;
>>>>+
>>>>+           /*
>>>>+            * We have to set the VF PE number to invalid one, which is
>>>>+            * required to plug the VF successfully.
>>>>+            */
>>>>+           pdn->pe_number = IODA_INVALID_PE;
>>>>+   } else {
>>>>+           pci_lock_rescan_remove();
>>>>+           pci_stop_and_remove_bus_device(dev);
>>>>+           pci_unlock_rescan_remove();
>>>>+   }
>>>>
>>>>    return NULL;
>>>>  }
>>>>@@ -548,6 +590,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
>>>>pci_bus *bus)
>>>>    struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>>>>    struct timeval tstamp;
>>>>    int cnt, rc, removed = 0;
>>>>+   struct eeh_dev *edev;
>>>>
>>>>    /* pcibios will clear the counter; save the value */
>>>>    cnt = pe->freeze_count;
>>>>@@ -561,12 +604,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
>>>>pci_bus *bus)
>>>>     */
>>>>    eeh_pe_state_mark(pe, EEH_PE_KEEP);
>>>>    if (bus) {
>>>>-           pci_lock_rescan_remove();
>>>>-           pcibios_remove_pci_devices(bus);
>>>>-           pci_unlock_rescan_remove();
>>>>-   } else if (frozen_bus) {
>>>>+           if (pe->type & EEH_PE_VF)
>>>>+                   eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>>
>>>
>>>I believe the rule is that if one branch of "if" uses curly braces, then the
>>>other should have them too.
>>>
>>
>>Thanks for reminding, will fix it in next version.
>
>
>I thought checkpatch.pl checks for it but apparently it does not.
>
>
>
>>>
>>>>+           else {
>>>>+                   pci_lock_rescan_remove();
>>>>+                   pcibios_remove_pci_devices(bus);
>>>>+                   pci_unlock_rescan_remove();
>>>>+           }
>>>>+   } else if (frozen_bus)
>>>>            eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
>>>>-   }
>>>>
>>>>    /*
>>>>     * Reset the pci controller. (Asserts RST#; resets config space).
>>>>@@ -607,14 +653,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
>>>>pci_bus *bus)
>>>>             * PE. We should disconnect it so the binding can be
>>>>             * rebuilt when adding PCI devices.
>>>>             */
>>>>+           edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>>>            eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>>>-           pcibios_add_pci_devices(bus);
>>>>+           if (pe->type & EEH_PE_VF)
>>>
>>>Move "edev = list_first_entry(&pe->edevs, struct eeh_dev, list)" here. You
>>>could actually do:
>>>
>>>eeh_add_virt_device(list_first_entry(&pe->edevs, struct eeh_dev, list), 
>>>NULL);
>>>
>>>and drop local variable @edev. Or move it to this scope. Dunno.
>>>
>>
>>Hmm... as I know, in eeh_pe_detach_dev() will remove the edev from pe's edevs
>>list.
>>
>>>
>>>>+                   eeh_add_virt_device(edev, NULL);
>>>>+           else
>>>>+                   pcibios_add_pci_devices(bus);
>>>>    } else if (frozen_bus && removed) {
>>>>            pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>>>>            ssleep(5);
>>>>
>>>>+           edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>>>            eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>>>-           pcibios_add_pci_devices(frozen_bus);
>>>>+           if (pe->type & EEH_PE_VF)
>>>
>>>
>>>The same comment as above.
>>>
>>>>+                   eeh_add_virt_device(edev, NULL);
>>>>+           else
>>>>+                   pcibios_add_pci_devices(frozen_bus);
>>>>    }
>>>>    eeh_pe_state_clear(pe, EEH_PE_KEEP);
>>>>
>>>>@@ -792,11 +846,15 @@ perm_error:
>>>>     * the their PCI config any more.
>>>>     */
>>>>    if (frozen_bus) {
>>>>-           eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>>>-
>>>>-           pci_lock_rescan_remove();
>>>>-           pcibios_remove_pci_devices(frozen_bus);
>>>>-           pci_unlock_rescan_remove();
>>>>+           if (pe->type & EEH_PE_VF) {
>>>>+                   eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>>>+                   eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>>>+           } else {
>>>>+                   eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>>>+                   pci_lock_rescan_remove();
>>>>+                   pcibios_remove_pci_devices(frozen_bus);
>>>>+                   pci_unlock_rescan_remove();
>>>>+           }
>>>>    }
>>>>  }
>>>>
>>>>diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
>>>>index 260a701..5cde950 100644
>>>>--- a/arch/powerpc/kernel/eeh_pe.c
>>>>+++ b/arch/powerpc/kernel/eeh_pe.c
>>>>@@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>>>>    if (pe->type & EEH_PE_PHB) {
>>>>            bus = pe->phb->bus;
>>>>    } else if (pe->type & EEH_PE_BUS ||
>>>>-              pe->type & EEH_PE_DEVICE) {
>>>>+              pe->type & EEH_PE_DEVICE ||
>>>>+              pe->type & EEH_PE_VF) {
>>>>            if (pe->bus) {
>>>>                    bus = pe->bus;
>>>>                    goto out;
>>>>
>
>
>-- 
>Alexey
>--
>To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>the body of a message to majord...@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Richard Yang
Help you, Help me

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to