In the current design, a numa-node is made online only if
that node is attached to cpu/memory. With this design, if
any PCI/IO device is found to be attached to a numa-node
which is not online then the numa-node id of the corresponding
PCI/IO device is set to NUMA_NO_NODE(-1). This design may
negatively impact the performance of PCIe device if the
numa-node assigned to PCIe device is -1 because in such case
we may not be able to accurately calculate the distance
between two nodes.
The multi-controller NVMe PCIe disk has an issue with
calculating the node distance if the PCIe NVMe controller
is attached to a PCI host bridge which has numa-node id
value set to NUMA_NO_NODE. This patch helps fix this ensuring
that a cpu/memory less numa node is made online if it's
attached to PCI host bridge.

Signed-off-by: Nilay Shroff <ni...@linux.ibm.com>
---
 arch/powerpc/mm/numa.c                     | 14 +++++++++++++-
 arch/powerpc/platforms/pseries/pci_dlpar.c | 14 ++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a490724e84ad..9e5e366cee43 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -896,7 +896,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb 
*lmb,
 
 static int __init parse_numa_properties(void)
 {
-       struct device_node *memory;
+       struct device_node *memory, *pci;
        int default_nid = 0;
        unsigned long i;
        const __be32 *associativity;
@@ -1010,6 +1010,18 @@ static int __init parse_numa_properties(void)
                        goto new_range;
        }
 
+       for_each_node_by_name(pci, "pci") {
+               int nid;
+
+               associativity = of_get_associativity(pci);
+               if (associativity) {
+                       nid = associativity_to_nid(associativity);
+                       initialize_form1_numa_distance(associativity);
+               }
+               if (likely(nid >= 0) && !node_online(nid))
+                       node_set_online(nid);
+       }
+
        /*
         * Now do the same thing for each MEMBLOCK listed in the
         * ibm,dynamic-memory property in the
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c 
b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 4448386268d9..52e2623a741d 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -11,6 +11,7 @@
 
 #include <linux/pci.h>
 #include <linux/export.h>
+#include <linux/node.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 #include <asm/firmware.h>
@@ -21,9 +22,22 @@
 struct pci_controller *init_phb_dynamic(struct device_node *dn)
 {
        struct pci_controller *phb;
+       int nid;
 
        pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn);
 
+       nid = of_node_to_nid(dn);
+       if (likely((nid) >= 0)) {
+               if (!node_online(nid)) {
+                       if (__register_one_node(nid)) {
+                               pr_err("PCI: Failed to register node %d\n", 
nid);
+                       } else {
+                               update_numa_distance(dn);
+                               node_set_online(nid);
+                       }
+               }
+       }
+
        phb = pcibios_alloc_controller(dn);
        if (!phb)
                return NULL;
-- 
2.44.0

Reply via email to