vphn: Better integrate vphn source code

Michael Bringmann Mon, 28 Aug 2017 17:37:00 -0700

powerpc/vphn: Reorganize source code in order to better distinguish the
VPHN code from the NUMA code better, by moving relevant functions to
appropriate files.


Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/topology.h    |    6 
 arch/powerpc/mm/numa.c                 |  550 +------------------------------
 arch/powerpc/mm/vphn.c                 |  574 ++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/dlpar.c |    2 
 4 files changed, 583 insertions(+), 549 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index 85d6428..600e1c6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -106,12 +106,6 @@ static inline int prrn_is_enabled(void)
 #endif /* CONFIG_PPC_SPLPAR */
 #endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
 
-#if defined(CONFIG_PPC_SPLPAR)
-extern void shared_topology_update(void);
-#else
-#define        shared_topology_update()        0
-#endif /* CONFIG_PPC_SPLPAR */
-
 #include <asm-generic/topology.h>
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 2e8258a..a66f0da 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -42,8 +42,12 @@
 #include <asm/setup.h>
 #include <asm/vdso.h>
 
+#include "vphn.h"
+
 static int numa_enabled = 1;
 
+bool topology_updates_enabled = true;
+
 static char *cmdline __initdata;
 
 static int numa_debug;
@@ -61,8 +65,7 @@
 static int n_mem_addr_cells, n_mem_size_cells;
 static int form1_affinity;
 
-#define MAX_DISTANCE_REF_POINTS 4
-static int distance_ref_points_depth;
+int distance_ref_points_depth;
 static const __be32 *distance_ref_points;
 static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
 
@@ -143,12 +146,12 @@ static void reset_numa_cpu_lookup_table(void)
                numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+void update_numa_cpu_lookup_table(unsigned int cpu, int node)
 {
        numa_cpu_lookup_table[cpu] = node;
 }
 
-static void map_cpu_to_node(int cpu, int node)
+void map_cpu_to_node(int cpu, int node)
 {
        update_numa_cpu_lookup_table(cpu, node);
 
@@ -159,7 +162,7 @@ static void map_cpu_to_node(int cpu, int node)
 }
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
-static void unmap_cpu_from_node(unsigned long cpu)
+void unmap_cpu_from_node(unsigned long cpu)
 {
        int node = numa_cpu_lookup_table[cpu];
 
@@ -234,7 +237,7 @@ static void initialize_distance_lookup_table(int nid,
 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
  * info is found.
  */
-static int associativity_to_nid(const __be32 *associativity)
+int associativity_to_nid(const __be32 *associativity)
 {
        int nid = -1;
 
@@ -1001,8 +1004,6 @@ static int __init early_numa(char *p)
 }
 early_param("numa", early_numa);
 
-static bool topology_updates_enabled = true;
-
 static int __init early_topology_updates(char *p)
 {
        if (!p)
@@ -1179,536 +1180,3 @@ u64 memory_hotplug_max(void)
         return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-
-/* Virtual Processor Home Node (VPHN) support */
-#ifdef CONFIG_PPC_SPLPAR
-
-#include "vphn.h"
-
-struct topology_update_data {
-       struct topology_update_data *next;
-       unsigned int cpu;
-       int old_nid;
-       int new_nid;
-};
-
-#define        TOPOLOGY_DEF_TIMER_SECS         60
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-static int topology_inited;
-static int topology_update_needed;
-
-/*
- * Change polling interval for associativity changes.
- */
-int timed_topology_update(int nsecs)
-{
-       if (nsecs > 0)
-               topology_timer_secs = nsecs;
-       else
-               topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
-       if (vphn_enabled)
-               reset_topology_timer();
-
-       return 0;
-}
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
-       int cpu;
-
-       /* The VPHN feature supports a maximum of 8 reference points */
-       BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
-       for_each_possible_cpu(cpu) {
-               int i;
-               u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
-
-               for (i = 0; i < distance_ref_points_depth; i++)
-                       counts[i] = hypervisor_counts[i];
-       }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
-       int cpu;
-       cpumask_t *changes = &cpu_associativity_changes_mask;
-
-       for_each_possible_cpu(cpu) {
-               int i, changed = 0;
-               u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
-
-               for (i = 0; i < distance_ref_points_depth; i++) {
-                       if (hypervisor_counts[i] != counts[i]) {
-                               counts[i] = hypervisor_counts[i];
-                               changed = 1;
-                       }
-               }
-               if (changed) {
-                       cpumask_or(changes, changes, cpu_sibling_mask(cpu));
-                       cpu = cpu_last_thread_sibling(cpu);
-               }
-       }
-
-       return cpumask_weight(changes);
-}
-
-/*
- * Retrieve the new associativity information for a virtual processor's
- * home node.
- */
-static long hcall_vphn(unsigned long cpu, __be32 *associativity)
-{
-       long rc;
-       long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
-       u64 flags = 1;
-       int hwcpu = get_hard_smp_processor_id(cpu);
-
-       rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
-       vphn_unpack_associativity(retbuf, associativity);
-
-       return rc;
-}
-
-static long vphn_get_associativity(unsigned long cpu,
-                                       __be32 *associativity)
-{
-       long rc;
-
-       rc = hcall_vphn(cpu, associativity);
-
-       switch (rc) {
-       case H_FUNCTION:
-               printk(KERN_INFO
-                       "VPHN is not supported. Disabling polling...\n");
-               stop_topology_update();
-               break;
-       case H_HARDWARE:
-               printk(KERN_ERR
-                       "hcall_vphn() experienced a hardware fault "
-                       "preventing VPHN. Disabling polling...\n");
-               stop_topology_update();
-               break;
-       case H_SUCCESS:
-               printk(KERN_INFO
-                       "VPHN hcall succeeded. Reset polling...\n");
-               timed_topology_update(0);
-               break;
-       }
-
-       return rc;
-}
-
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
-       struct topology_update_data *update;
-       unsigned long cpu;
-
-       if (!data)
-               return -EINVAL;
-
-       cpu = smp_processor_id();
-
-       for (update = data; update; update = update->next) {
-               int new_nid = update->new_nid;
-               if (cpu != update->cpu)
-                       continue;
-
-               unmap_cpu_from_node(cpu);
-               map_cpu_to_node(cpu, new_nid);
-               set_cpu_numa_node(cpu, new_nid);
-               set_cpu_numa_mem(cpu, local_memory_node(new_nid));
-               vdso_getcpu_init();
-       }
-
-       return 0;
-}
-
-static int update_lookup_table(void *data)
-{
-       struct topology_update_data *update;
-
-       if (!data)
-               return -EINVAL;
-
-       /*
-        * Upon topology update, the numa-cpu lookup table needs to be updated
-        * for all threads in the core, including offline CPUs, to ensure that
-        * future hotplug operations respect the cpu-to-node associativity
-        * properly.
-        */
-       for (update = data; update; update = update->next) {
-               int nid, base, j;
-
-               nid = update->new_nid;
-               base = cpu_first_thread_sibling(update->cpu);
-
-               for (j = 0; j < threads_per_core; j++) {
-                       update_numa_cpu_lookup_table(base + j, nid);
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- *
- * cpus_locked says whether we already hold cpu_hotplug_lock.
- */
-int numa_update_cpu_topology(bool cpus_locked)
-{
-       unsigned int cpu, sibling, changed = 0;
-       struct topology_update_data *updates, *ud;
-       __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
-       cpumask_t updated_cpus;
-       struct device *dev;
-       int weight, new_nid, i = 0;
-
-       if (!prrn_enabled && !vphn_enabled) {
-               if (!topology_inited)
-                       topology_update_needed = 1;
-               return 0;
-       }
-
-       weight = cpumask_weight(&cpu_associativity_changes_mask);
-       if (!weight)
-               return 0;
-
-       updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
-       if (!updates)
-               return 0;
-
-       cpumask_clear(&updated_cpus);
-
-       for_each_cpu(cpu, &cpu_associativity_changes_mask) {
-               /*
-                * If siblings aren't flagged for changes, updates list
-                * will be too short. Skip on this update and set for next
-                * update.
-                */
-               if (!cpumask_subset(cpu_sibling_mask(cpu),
-                                       &cpu_associativity_changes_mask)) {
-                       pr_info("Sibling bits not set for associativity "
-                                       "change, cpu%d\n", cpu);
-                       cpumask_or(&cpu_associativity_changes_mask,
-                                       &cpu_associativity_changes_mask,
-                                       cpu_sibling_mask(cpu));
-                       cpu = cpu_last_thread_sibling(cpu);
-                       continue;
-               }
-
-               /* Use associativity from first thread for all siblings */
-               vphn_get_associativity(cpu, associativity);
-               new_nid = associativity_to_nid(associativity);
-               if (new_nid < 0 || !node_online(new_nid))
-                       new_nid = first_online_node;
-
-               if (new_nid == numa_cpu_lookup_table[cpu]) {
-                       cpumask_andnot(&cpu_associativity_changes_mask,
-                                       &cpu_associativity_changes_mask,
-                                       cpu_sibling_mask(cpu));
-                       pr_info("Assoc chg gives same node %d for cpu%d\n",
-                                       new_nid, cpu);
-                       cpu = cpu_last_thread_sibling(cpu);
-                       continue;
-               }
-
-               for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
-                       ud = &updates[i++];
-                       ud->cpu = sibling;
-                       ud->new_nid = new_nid;
-                       ud->old_nid = numa_cpu_lookup_table[sibling];
-                       cpumask_set_cpu(sibling, &updated_cpus);
-                       if (i < weight)
-                               ud->next = &updates[i];
-                       else
-                               ud->next = NULL;
-               }
-               cpu = cpu_last_thread_sibling(cpu);
-       }
-
-       pr_debug("Topology update for the following CPUs:\n");
-       if (cpumask_weight(&updated_cpus)) {
-               for (ud = &updates[0]; ud; ud = ud->next) {
-                       pr_debug("cpu %d moving from node %d "
-                                         "to %d\n", ud->cpu,
-                                         ud->old_nid, ud->new_nid);
-               }
-       }
-
-       /*
-        * In cases where we have nothing to update (because the updates list
-        * is too short or because the new topology is same as the old one),
-        * skip invoking update_cpu_topology() via stop-machine(). This is
-        * necessary (and not just a fast-path optimization) since stop-machine
-        * can end up electing a random CPU to run update_cpu_topology(), and
-        * thus trick us into setting up incorrect cpu-node mappings (since
-        * 'updates' is kzalloc()'ed).
-        *
-        * And for the similar reason, we will skip all the following updating.
-        */
-       if (!cpumask_weight(&updated_cpus))
-               goto out;
-
-       if (cpus_locked)
-               stop_machine_cpuslocked(update_cpu_topology, &updates[0],
-                                       &updated_cpus);
-       else
-               stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
-
-       /*
-        * Update the numa-cpu lookup table with the new mappings, even for
-        * offline CPUs. It is best to perform this update from the stop-
-        * machine context.
-        */
-       if (cpus_locked)
-               stop_machine_cpuslocked(update_lookup_table, &updates[0],
-                                       cpumask_of(raw_smp_processor_id()));
-       else
-               stop_machine(update_lookup_table, &updates[0],
-                            cpumask_of(raw_smp_processor_id()));
-
-       for (ud = &updates[0]; ud; ud = ud->next) {
-               unregister_cpu_under_node(ud->cpu, ud->old_nid);
-               register_cpu_under_node(ud->cpu, ud->new_nid);
-
-               dev = get_cpu_device(ud->cpu);
-               if (dev)
-                       kobject_uevent(&dev->kobj, KOBJ_CHANGE);
-               cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
-               changed = 1;
-       }
-
-out:
-       kfree(updates);
-       topology_update_needed = 0;
-       return changed;
-}
-
-int arch_update_cpu_topology(void)
-{
-       lockdep_assert_cpus_held();
-       return numa_update_cpu_topology(true);
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
-       rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
-       schedule_work(&topology_work);
-}
-
-void shared_topology_update(void)
-{
-       if (firmware_has_feature(FW_FEATURE_VPHN) &&
-                  lppaca_shared_proc(get_lppaca()))
-               topology_schedule_update();
-}
-EXPORT_SYMBOL(shared_topology_update);
-
-static void topology_timer_fn(unsigned long ignored)
-{
-       if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
-               topology_schedule_update();
-       else if (vphn_enabled) {
-               if (update_cpu_associativity_changes_mask() > 0)
-                       topology_schedule_update();
-               reset_topology_timer();
-       }
-}
-static struct timer_list topology_timer =
-       TIMER_INITIALIZER(topology_timer_fn, 0, 0);
-
-static void reset_topology_timer(void)
-{
-       topology_timer.data = 0;
-       topology_timer.expires = jiffies + topology_timer_secs * HZ;
-       mod_timer(&topology_timer, topology_timer.expires);
-}
-
-#ifdef CONFIG_SMP
-
-static void stage_topology_update(int core_id)
-{
-       cpumask_or(&cpu_associativity_changes_mask,
-               &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
-       reset_topology_timer();
-}
-
-static int dt_update_callback(struct notifier_block *nb,
-                               unsigned long action, void *data)
-{
-       struct of_reconfig_data *update = data;
-       int rc = NOTIFY_DONE;
-
-       switch (action) {
-       case OF_RECONFIG_UPDATE_PROPERTY:
-               if (!of_prop_cmp(update->dn->type, "cpu") &&
-                   !of_prop_cmp(update->prop->name, "ibm,associativity")) {
-                       u32 core_id;
-                       of_property_read_u32(update->dn, "reg", &core_id);
-                       stage_topology_update(core_id);
-                       rc = NOTIFY_OK;
-               }
-               break;
-       }
-
-       return rc;
-}
-
-static struct notifier_block dt_update_nb = {
-       .notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
-       int rc = 0;
-
-       if (firmware_has_feature(FW_FEATURE_PRRN)) {
-               if (!prrn_enabled) {
-                       prrn_enabled = 1;
-#ifdef CONFIG_SMP
-                       rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
-               }
-       }
-       if (firmware_has_feature(FW_FEATURE_VPHN) &&
-                  lppaca_shared_proc(get_lppaca())) {
-               if (!vphn_enabled) {
-                       vphn_enabled = 1;
-                       setup_cpu_associativity_change_counters();
-                       init_timer_deferrable(&topology_timer);
-                       reset_topology_timer();
-               }
-       }
-
-       return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
-       int rc = 0;
-
-       if (prrn_enabled) {
-               prrn_enabled = 0;
-#ifdef CONFIG_SMP
-               rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
-       }
-       if (vphn_enabled) {
-               vphn_enabled = 0;
-               rc = del_timer_sync(&topology_timer);
-       }
-
-       return rc;
-}
-
-int prrn_is_enabled(void)
-{
-       return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
-       if (vphn_enabled || prrn_enabled)
-               seq_puts(file, "on\n");
-       else
-               seq_puts(file, "off\n");
-
-       return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
-                             size_t count, loff_t *off)
-{
-       char kbuf[4]; /* "on" or "off" plus null. */
-       int read_len;
-
-       read_len = count < 3 ? count : 3;
-       if (copy_from_user(kbuf, buf, read_len))
-               return -EINVAL;
-
-       kbuf[read_len] = '\0';
-
-       if (!strncmp(kbuf, "on", 2))
-               start_topology_update();
-       else if (!strncmp(kbuf, "off", 3))
-               stop_topology_update();
-       else
-               return -EINVAL;
-
-       return count;
-}
-
-static const struct file_operations topology_ops = {
-       .read = seq_read,
-       .write = topology_write,
-       .open = topology_open,
-       .release = single_release
-};
-
-static int topology_update_init(void)
-{
-       /* Do not poll for changes if disabled at boot */
-       if (topology_updates_enabled)
-               start_topology_update();
-
-       if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
-               return -ENOMEM;
-
-       topology_inited = 1;
-       if (topology_update_needed)
-               bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
-                                       nr_cpumask_bits);
-
-       return 0;
-}
-device_initcall(topology_update_init);
-#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/vphn.c
index 5f8ef50..3859c9c 100644
--- a/arch/powerpc/mm/vphn.c
+++ b/arch/powerpc/mm/vphn.c
@@ -1,6 +1,70 @@
+/*
+ * pSeries VPHN support
+ *
+ * Copyright (C) 2015 Greg Kurz <gk...@linux.vnet.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define pr_fmt(fmt) "vphn: " fmt
+
+#include <linux/threads.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/export.h>
+#include <linux/nodemask.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/pfn.h>
+#include <linux/cpuset.h>
+#include <linux/node.h>
+#include <linux/stop_machine.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <asm/cputhreads.h>
+#include <asm/sparsemem.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/topology.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/hvcall.h>
+#include <asm/setup.h>
+#include <asm/vdso.h>
 #include <asm/byteorder.h>
+
+/* Virtual Processor Home Node (VPHN) support */
+
 #include "vphn.h"
 
+struct topology_update_data {
+       struct topology_update_data *next;
+       unsigned int cpu;
+       int old_nid;
+       int new_nid;
+};
+
+#define        TOPOLOGY_DEF_TIMER_SECS         60
+
+static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
+static cpumask_t cpu_associativity_changes_mask;
+static int vphn_enabled;
+static int prrn_enabled;
+static void reset_topology_timer(void);
+static int topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+static int topology_inited;
+static int topology_update_needed;
+
 /*
  * The associativity domain numbers are returned from the hypervisor as a
  * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the
@@ -68,3 +132,513 @@ int vphn_unpack_associativity(const long *packed, __be32 
*unpacked)
 
        return nr_assoc_doms;
 }
+
+/*
+ * Change polling interval for associativity changes.
+ */
+int timed_topology_update(int nsecs)
+{
+       if (nsecs > 0)
+               topology_timer_secs = nsecs;
+       else
+               topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+
+       if (vphn_enabled)
+               reset_topology_timer();
+
+       return 0;
+}
+
+/*
+ * Store the current values of the associativity change counters in the
+ * hypervisor.
+ */
+static void setup_cpu_associativity_change_counters(void)
+{
+       int cpu;
+
+       /* The VPHN feature supports a maximum of 8 reference points */
+       BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
+
+       for_each_possible_cpu(cpu) {
+               int i;
+               u8 *counts = vphn_cpu_change_counts[cpu];
+               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+               for (i = 0; i < distance_ref_points_depth; i++)
+                       counts[i] = hypervisor_counts[i];
+       }
+}
+
+/*
+ * The hypervisor maintains a set of 8 associativity change counters in
+ * the VPA of each cpu that correspond to the associativity levels in the
+ * ibm,associativity-reference-points property. When an associativity
+ * level changes, the corresponding counter is incremented.
+ *
+ * Set a bit in cpu_associativity_changes_mask for each cpu whose home
+ * node associativity levels have changed.
+ *
+ * Returns the number of cpus with unhandled associativity changes.
+ */
+static int update_cpu_associativity_changes_mask(void)
+{
+       int cpu;
+       cpumask_t *changes = &cpu_associativity_changes_mask;
+
+       for_each_possible_cpu(cpu) {
+               int i, changed = 0;
+               u8 *counts = vphn_cpu_change_counts[cpu];
+               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+               for (i = 0; i < distance_ref_points_depth; i++) {
+                       if (hypervisor_counts[i] != counts[i]) {
+                               counts[i] = hypervisor_counts[i];
+                               changed = 1;
+                       }
+               }
+               if (changed) {
+                       cpumask_or(changes, changes, cpu_sibling_mask(cpu));
+                       cpu = cpu_last_thread_sibling(cpu);
+               }
+       }
+
+       return cpumask_weight(changes);
+}
+
+/*
+ * Retrieve the new associativity information for a virtual processor's
+ * home node.
+ */
+static long hcall_vphn(unsigned long cpu, __be32 *associativity)
+{
+       long rc;
+       long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+       u64 flags = 1;
+       int hwcpu = get_hard_smp_processor_id(cpu);
+
+       rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+       vphn_unpack_associativity(retbuf, associativity);
+
+       return rc;
+}
+
+static long vphn_get_associativity(unsigned long cpu,
+                                       __be32 *associativity)
+{
+       long rc;
+
+       rc = hcall_vphn(cpu, associativity);
+
+       switch (rc) {
+       case H_FUNCTION:
+               printk(KERN_INFO
+                       "VPHN is not supported. Disabling polling...\n");
+               stop_topology_update();
+               break;
+       case H_HARDWARE:
+               printk(KERN_ERR
+                       "hcall_vphn() experienced a hardware fault "
+                       "preventing VPHN. Disabling polling...\n");
+               stop_topology_update();
+               break;
+       case H_SUCCESS:
+               printk(KERN_INFO
+                       "VPHN hcall succeeded. Reset polling...\n");
+               timed_topology_update(0);
+               break;
+       }
+
+       return rc;
+}
+
+/*
+ * Update the CPU maps and sysfs entries for a single CPU when its NUMA
+ * characteristics change. This function doesn't perform any locking and is
+ * only safe to call from stop_machine().
+ */
+static int update_cpu_topology(void *data)
+{
+       struct topology_update_data *update;
+       unsigned long cpu;
+
+       if (!data)
+               return -EINVAL;
+
+       cpu = smp_processor_id();
+
+       for (update = data; update; update = update->next) {
+               int new_nid = update->new_nid;
+               if (cpu != update->cpu)
+                       continue;
+
+               unmap_cpu_from_node(cpu);
+               map_cpu_to_node(cpu, new_nid);
+               set_cpu_numa_node(cpu, new_nid);
+               set_cpu_numa_mem(cpu, local_memory_node(new_nid));
+               vdso_getcpu_init();
+       }
+
+       return 0;
+}
+
+static int update_lookup_table(void *data)
+{
+       struct topology_update_data *update;
+
+       if (!data)
+               return -EINVAL;
+
+       /*
+        * Upon topology update, the numa-cpu lookup table needs to be updated
+        * for all threads in the core, including offline CPUs, to ensure that
+        * future hotplug operations respect the cpu-to-node associativity
+        * properly.
+        */
+       for (update = data; update; update = update->next) {
+               int nid, base, j;
+
+               nid = update->new_nid;
+               base = cpu_first_thread_sibling(update->cpu);
+
+               for (j = 0; j < threads_per_core; j++) {
+                       update_numa_cpu_lookup_table(base + j, nid);
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Update the node maps and sysfs entries for each cpu whose home node
+ * has changed. Returns 1 when the topology has changed, and 0 otherwise.
+ *
+ * cpus_locked says whether we already hold cpu_hotplug_lock.
+ */
+int numa_update_cpu_topology(bool cpus_locked)
+{
+       unsigned int cpu, sibling, changed = 0;
+       struct topology_update_data *updates, *ud;
+       __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+       cpumask_t updated_cpus;
+       struct device *dev;
+       int weight, new_nid, i = 0;
+
+       if (!prrn_enabled && !vphn_enabled) {
+               if (!topology_inited)
+                       topology_update_needed = 1;
+               return 0;
+       }
+
+       weight = cpumask_weight(&cpu_associativity_changes_mask);
+       if (!weight)
+               return 0;
+
+       updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
+       if (!updates)
+               return 0;
+
+       cpumask_clear(&updated_cpus);
+
+       for_each_cpu(cpu, &cpu_associativity_changes_mask) {
+               /*
+                * If siblings aren't flagged for changes, updates list
+                * will be too short. Skip on this update and set for next
+                * update.
+                */
+               if (!cpumask_subset(cpu_sibling_mask(cpu),
+                                       &cpu_associativity_changes_mask)) {
+                       pr_info("Sibling bits not set for associativity "
+                                       "change, cpu%d\n", cpu);
+                       cpumask_or(&cpu_associativity_changes_mask,
+                                       &cpu_associativity_changes_mask,
+                                       cpu_sibling_mask(cpu));
+                       cpu = cpu_last_thread_sibling(cpu);
+                       continue;
+               }
+
+               /* Use associativity from first thread for all siblings */
+               vphn_get_associativity(cpu, associativity);
+               new_nid = associativity_to_nid(associativity);
+               if (new_nid < 0 || !node_online(new_nid))
+                       new_nid = first_online_node;
+
+               if (new_nid == numa_cpu_lookup_table[cpu]) {
+                       cpumask_andnot(&cpu_associativity_changes_mask,
+                                       &cpu_associativity_changes_mask,
+                                       cpu_sibling_mask(cpu));
+                       pr_info("Assoc chg gives same node %d for cpu%d\n",
+                                       new_nid, cpu);
+                       cpu = cpu_last_thread_sibling(cpu);
+                       continue;
+               }
+
+               for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+                       ud = &updates[i++];
+                       ud->cpu = sibling;
+                       ud->new_nid = new_nid;
+                       ud->old_nid = numa_cpu_lookup_table[sibling];
+                       cpumask_set_cpu(sibling, &updated_cpus);
+                       if (i < weight)
+                               ud->next = &updates[i];
+               }
+               cpu = cpu_last_thread_sibling(cpu);
+       }
+
+       if (i)
+               updates[i-1].next = NULL;
+
+       pr_debug("Topology update for the following CPUs:\n");
+       if (cpumask_weight(&updated_cpus)) {
+               for (ud = &updates[0]; ud; ud = ud->next) {
+                       pr_debug("cpu %d moving from node %d "
+                                         "to %d\n", ud->cpu,
+                                         ud->old_nid, ud->new_nid);
+               }
+       }
+
+       /*
+        * In cases where we have nothing to update (because the updates list
+        * is too short or because the new topology is same as the old one),
+        * skip invoking update_cpu_topology() via stop-machine(). This is
+        * necessary (and not just a fast-path optimization) since stop-machine
+        * can end up electing a random CPU to run update_cpu_topology(), and
+        * thus trick us into setting up incorrect cpu-node mappings (since
+        * 'updates' is kzalloc()'ed).
+        *
+        * And for the similar reason, we will skip all the following updating.
+        */
+       if (!cpumask_weight(&updated_cpus))
+               goto out;
+
+       if (cpus_locked)
+               stop_machine_cpuslocked(update_cpu_topology, &updates[0],
+                                       &updated_cpus);
+       else
+               stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+
+       /*
+        * Update the numa-cpu lookup table with the new mappings, even for
+        * offline CPUs. It is best to perform this update from the stop-
+        * machine context.
+        */
+       if (cpus_locked)
+               stop_machine_cpuslocked(update_lookup_table, &updates[0],
+                                       cpumask_of(raw_smp_processor_id()));
+       else
+               stop_machine(update_lookup_table, &updates[0],
+                            cpumask_of(raw_smp_processor_id()));
+
+       for (ud = &updates[0]; ud; ud = ud->next) {
+               unregister_cpu_under_node(ud->cpu, ud->old_nid);
+               register_cpu_under_node(ud->cpu, ud->new_nid);
+
+               dev = get_cpu_device(ud->cpu);
+               if (dev)
+                       kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+               cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
+               changed = 1;
+       }
+
+out:
+       kfree(updates);
+       topology_update_needed = 0;
+       return changed;
+}
+
+int arch_update_cpu_topology(void)
+{
+       lockdep_assert_cpus_held();
+       return numa_update_cpu_topology(true);
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+       rebuild_sched_domains();
+}
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+static void topology_schedule_update(void)
+{
+       schedule_work(&topology_work);
+}
+
+static void shared_topology_update(void)
+{
+       if (firmware_has_feature(FW_FEATURE_VPHN) &&
+                  lppaca_shared_proc(get_lppaca()))
+               topology_schedule_update();
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+       if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
+               topology_schedule_update();
+       else if (vphn_enabled) {
+               if (update_cpu_associativity_changes_mask() > 0)
+                       topology_schedule_update();
+               reset_topology_timer();
+       }
+}
+static struct timer_list topology_timer =
+       TIMER_INITIALIZER(topology_timer_fn, 0, 0);
+
+static void reset_topology_timer(void)
+{
+       topology_timer.data = 0;
+       topology_timer.expires = jiffies + topology_timer_secs * HZ;
+       mod_timer(&topology_timer, topology_timer.expires);
+}
+
+#ifdef CONFIG_SMP
+
+static void stage_topology_update(int core_id)
+{
+       cpumask_or(&cpu_associativity_changes_mask,
+               &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
+       reset_topology_timer();
+}
+
+static int dt_update_callback(struct notifier_block *nb,
+                               unsigned long action, void *data)
+{
+       struct of_reconfig_data *update = data;
+       int rc = NOTIFY_DONE;
+
+       switch (action) {
+       case OF_RECONFIG_UPDATE_PROPERTY:
+               if (!of_prop_cmp(update->dn->type, "cpu") &&
+                   !of_prop_cmp(update->prop->name, "ibm,associativity")) {
+                       u32 core_id;
+                       of_property_read_u32(update->dn, "reg", &core_id);
+                       stage_topology_update(core_id);
+                       rc = NOTIFY_OK;
+               }
+               break;
+       }
+
+       return rc;
+}
+
+static struct notifier_block dt_update_nb = {
+       .notifier_call = dt_update_callback,
+};
+
+#endif
+
+/*
+ * Start polling for associativity changes.
+ */
+int start_topology_update(void)
+{
+       int rc = 0;
+
+       if (firmware_has_feature(FW_FEATURE_PRRN)) {
+               if (!prrn_enabled) {
+                       prrn_enabled = 1;
+#ifdef CONFIG_SMP
+                       rc = of_reconfig_notifier_register(&dt_update_nb);
+#endif
+               }
+       }
+       if (firmware_has_feature(FW_FEATURE_VPHN) &&
+                  lppaca_shared_proc(get_lppaca())) {
+               if (!vphn_enabled) {
+                       vphn_enabled = 1;
+                       setup_cpu_associativity_change_counters();
+                       init_timer_deferrable(&topology_timer);
+                       reset_topology_timer();
+               }
+       }
+
+       return rc;
+}
+
+/*
+ * Disable polling for VPHN associativity changes.
+ */
+int stop_topology_update(void)
+{
+       int rc = 0;
+
+       if (prrn_enabled) {
+               prrn_enabled = 0;
+#ifdef CONFIG_SMP
+               rc = of_reconfig_notifier_unregister(&dt_update_nb);
+#endif
+       }
+       if (vphn_enabled) {
+               vphn_enabled = 0;
+               rc = del_timer_sync(&topology_timer);
+       }
+
+       return rc;
+}
+
+int prrn_is_enabled(void)
+{
+       return prrn_enabled;
+}
+
+static int topology_read(struct seq_file *file, void *v)
+{
+       if (vphn_enabled || prrn_enabled)
+               seq_puts(file, "on\n");
+       else
+               seq_puts(file, "off\n");
+
+       return 0;
+}
+
+static int topology_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, topology_read, NULL);
+}
+
+static ssize_t topology_write(struct file *file, const char __user *buf,
+                             size_t count, loff_t *off)
+{
+       char kbuf[4]; /* "on" or "off" plus null. */
+       int read_len;
+
+       read_len = count < 3 ? count : 3;
+       if (copy_from_user(kbuf, buf, read_len))
+               return -EINVAL;
+
+       kbuf[read_len] = '\0';
+
+       if (!strncmp(kbuf, "on", 2))
+               start_topology_update();
+       else if (!strncmp(kbuf, "off", 3))
+               stop_topology_update();
+       else
+               return -EINVAL;
+
+       return count;
+}
+
+static const struct file_operations topology_ops = {
+       .read = seq_read,
+       .write = topology_write,
+       .open = topology_open,
+       .release = single_release
+};
+
+static int topology_update_init(void)
+{
+       /* Do not poll for changes if disabled at boot */
+       if (topology_updates_enabled)
+               start_topology_update();
+
+       if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
+               return -ENOMEM;
+
+       topology_inited = 1;
+       if (topology_update_needed)
+               bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
+                                       nr_cpumask_bits);
+
+       return 0;
+}
+device_initcall(topology_update_init);
+#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index ba9a4a0..3918769 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -592,8 +592,6 @@ static ssize_t dlpar_show(struct class *class, struct 
class_attribute *attr,
 
 static int __init pseries_dlpar_init(void)
 {
-       shared_topology_update();
-
        pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
                                        WQ_UNBOUND, 1);
        return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);

[PATCH V11 3/3] powerpc/vphn: Better integrate vphn source code

Reply via email to