Re: [PATCH 02/14] powerpc/eeh: Fix race when freeing PDNs

2019-09-16 Thread Sam Bobroff
On Tue, Sep 03, 2019 at 08:15:53PM +1000, Oliver O'Halloran wrote:
> When hot-adding devices we rely on the hotplug driver to create pci_dn's
> for the devices under the hotplug slot. Converse, when hot-removing the
> driver will remove the pci_dn's that it created. This is a problem because
> the pci_dev is still live until it's refcount drops to zero. This can
> happen if the driver is slow to tear down it's internal state. Ideally, the
> driver would not attempt to perform any config accesses to the device once
> it's been marked as removed, but sometimes it happens. As a result, we
> might attempt to access the pci_dn for a device that has been torn down and
> the kernel may crash as a result.
> 
> To fix this, don't free the pci_dn unless the corresponding pci_dev has
> been released.  If the pci_dev is still live, then we mark the pci_dn with
> a flag that indicates the pci_dev's release function should free it.
> 
> Signed-off-by: Oliver O'Halloran 

If I understand this, it works because when
pci_remove_device_node_info() calls pci_get_domain_bus_and_slot(),
it either:
a) returns null, meaning the pci_dev is already gone, the release
handler is already called, and the PDN can be removed there, or
b) returns non-null and atomically increases the refcount and the
release handler won't be called until after we've set the DEAD flag and
released our reference.

Looks good to me,

Reviewed-by: Sam Bobroff 

> ---
>  arch/powerpc/include/asm/pci-bridge.h |  1 +
>  arch/powerpc/kernel/pci-hotplug.c |  7 +++
>  arch/powerpc/kernel/pci_dn.c  | 21 +++--
>  3 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pci-bridge.h 
> b/arch/powerpc/include/asm/pci-bridge.h
> index 54a9de01c2a3..69f4cb3b7c56 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -183,6 +183,7 @@ struct iommu_table;
>  struct pci_dn {
>   int flags;
>  #define PCI_DN_FLAG_IOV_VF   0x01
> +#define PCI_DN_FLAG_DEAD 0x02/* Device has been hot-removed */
>  
>   int busno;  /* pci bus number */
>   int devfn;  /* pci device and function number */
> diff --git a/arch/powerpc/kernel/pci-hotplug.c 
> b/arch/powerpc/kernel/pci-hotplug.c
> index 0b0cf8168b47..fc62c4bc47b1 100644
> --- a/arch/powerpc/kernel/pci-hotplug.c
> +++ b/arch/powerpc/kernel/pci-hotplug.c
> @@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
>  void pcibios_release_device(struct pci_dev *dev)
>  {
>   struct pci_controller *phb = pci_bus_to_host(dev->bus);
> + struct pci_dn *pdn = pci_get_pdn(dev);
>  
>   eeh_remove_device(dev);
>  
>   if (phb->controller_ops.release_device)
>   phb->controller_ops.release_device(dev);
> +
> + /* free()ing the pci_dn has been deferred to us, do it now */
> + if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
> + pci_dbg(dev, "freeing dead pdn\n");
> + kfree(pdn);
> + }
>  }
>  
>  /**
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index 5614ca0940ca..4e654df55969 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -320,6 +320,7 @@ void pci_remove_device_node_info(struct device_node *dn)
>  {
>   struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
>   struct device_node *parent;
> + struct pci_dev *pdev;
>  #ifdef CONFIG_EEH
>   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>  
> @@ -333,12 +334,28 @@ void pci_remove_device_node_info(struct device_node *dn)
>   WARN_ON(!list_empty(>child_list));
>   list_del(>list);
>  
> + /* Drop the parent pci_dn's ref to our backing dt node */
>   parent = of_get_parent(dn);
>   if (parent)
>   of_node_put(parent);
>  
> - dn->data = NULL;
> - kfree(pdn);
> + /*
> +  * At this point we *might* still have a pci_dev that was
> +  * instantiated from this pci_dn. So defer free()ing it until
> +  * the pci_dev's release function is called.
> +  */
> + pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
> + pdn->busno, pdn->devfn);
> + if (pdev) {
> + /* NB: pdev has a ref to dn */
> + pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
> + pdn->flags |= PCI_DN_FLAG_DEAD;
> + } else {
> + dn->data = NULL;
> + kfree(pdn);
> + }
> +
> + pci_dev_put(pdev);
>  }
>  EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
>  
> -- 
> 2.21.0
> 


signature.asc
Description: PGP signature


[PATCH 02/14] powerpc/eeh: Fix race when freeing PDNs

2019-09-03 Thread Oliver O'Halloran
When hot-adding devices we rely on the hotplug driver to create pci_dn's
for the devices under the hotplug slot. Converse, when hot-removing the
driver will remove the pci_dn's that it created. This is a problem because
the pci_dev is still live until it's refcount drops to zero. This can
happen if the driver is slow to tear down it's internal state. Ideally, the
driver would not attempt to perform any config accesses to the device once
it's been marked as removed, but sometimes it happens. As a result, we
might attempt to access the pci_dn for a device that has been torn down and
the kernel may crash as a result.

To fix this, don't free the pci_dn unless the corresponding pci_dev has
been released.  If the pci_dev is still live, then we mark the pci_dn with
a flag that indicates the pci_dev's release function should free it.

Signed-off-by: Oliver O'Halloran 
---
 arch/powerpc/include/asm/pci-bridge.h |  1 +
 arch/powerpc/kernel/pci-hotplug.c |  7 +++
 arch/powerpc/kernel/pci_dn.c  | 21 +++--
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 54a9de01c2a3..69f4cb3b7c56 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -183,6 +183,7 @@ struct iommu_table;
 struct pci_dn {
int flags;
 #define PCI_DN_FLAG_IOV_VF 0x01
+#define PCI_DN_FLAG_DEAD   0x02/* Device has been hot-removed */
 
int busno;  /* pci bus number */
int devfn;  /* pci device and function number */
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 0b0cf8168b47..fc62c4bc47b1 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
 void pcibios_release_device(struct pci_dev *dev)
 {
struct pci_controller *phb = pci_bus_to_host(dev->bus);
+   struct pci_dn *pdn = pci_get_pdn(dev);
 
eeh_remove_device(dev);
 
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
+
+   /* free()ing the pci_dn has been deferred to us, do it now */
+   if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+   pci_dbg(dev, "freeing dead pdn\n");
+   kfree(pdn);
+   }
 }
 
 /**
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 5614ca0940ca..4e654df55969 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -320,6 +320,7 @@ void pci_remove_device_node_info(struct device_node *dn)
 {
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
+   struct pci_dev *pdev;
 #ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
@@ -333,12 +334,28 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(>child_list));
list_del(>list);
 
+   /* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
 
-   dn->data = NULL;
-   kfree(pdn);
+   /*
+* At this point we *might* still have a pci_dev that was
+* instantiated from this pci_dn. So defer free()ing it until
+* the pci_dev's release function is called.
+*/
+   pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+   pdn->busno, pdn->devfn);
+   if (pdev) {
+   /* NB: pdev has a ref to dn */
+   pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+   pdn->flags |= PCI_DN_FLAG_DEAD;
+   } else {
+   dn->data = NULL;
+   kfree(pdn);
+   }
+
+   pci_dev_put(pdev);
 }
 EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
 
-- 
2.21.0