From: "Gautham R. Shenoy"
On POWER systems, groups of threads within a core sharing the L2-cache
can be indicated by the "ibm,thread-groups" property array with the
identifier "2".
This patch adds support for detecting this, and when present, populate
the populating the cpu_l2_cache_mask of every CPU to the core-siblings
which share L2 with the CPU as specified in the by the
"ibm,thread-groups" property array.
On a platform with the following "ibm,thread-group" configuration
0001 0002 0004
0002 0004 0006 0001
0003 0005 0007 0002
0002 0004 0002
0004 0006 0001 0003
0005 0007
Without this patch, the sched-domain hierarchy for CPUs 0,1 would be
CPU0 attaching sched-domain(s):
domain-0: span=0,2,4,6 level=SMT
domain-1: span=0-7 level=CACHE
domain-2: span=0-15,24-39,48-55 level=MC
domain-3: span=0-55 level=DIE
CPU1 attaching sched-domain(s):
domain-0: span=1,3,5,7 level=SMT
domain-1: span=0-7 level=CACHE
domain-2: span=0-15,24-39,48-55 level=MC
domain-3: span=0-55 level=DIE
The CACHE domain at 0-7 is incorrect since the ibm,thread-groups
sub-array
[0002 0002 0004
0002 0004 0006
0001 0003 0005 0007]
indicates that L2 (Property "2") is shared only between the threads of a single
group. There are "2" groups of threads where each group contains "4"
threads each. The groups being {0,2,4,6} and {1,3,5,7}.
With this patch, the sched-domain hierarchy for CPUs 0,1 would be
CPU0 attaching sched-domain(s):
domain-0: span=0,2,4,6 level=SMT
domain-1: span=0-15,24-39,48-55 level=MC
domain-2: span=0-55 level=DIE
CPU1 attaching sched-domain(s):
domain-0: span=1,3,5,7 level=SMT
domain-1: span=0-15,24-39,48-55 level=MC
domain-2: span=0-55 level=DIE
The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1
resp.) gets degenerated into the SMT domain. Furthermore, the
last-level-cache domain gets correctly set to the SMT sched-domain.
Signed-off-by: Gautham R. Shenoy
---
arch/powerpc/include/asm/smp.h | 2 ++
arch/powerpc/kernel/smp.c | 58 ++
2 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index b2035b2..035459c 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu)
extern int cpu_to_core_id(int cpu);
extern bool has_big_cores;
+extern bool thread_group_shares_l2;
#define cpu_smt_mask cpu_smt_mask
#ifdef CONFIG_SCHED_SMT
@@ -187,6 +188,7 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
/* for UP */
#define hard_smp_processor_id()get_hard_smp_processor_id(0)
#define smp_setup_cpu_maps()
+#define thread_group_shares_l2 0
static inline void inhibit_secondary_onlining(void) {}
static inline void uninhibit_secondary_onlining(void) {}
static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9078b5b5..2b9b1bb 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -76,6 +76,7 @@
struct task_struct *secondary_current;
bool has_big_cores;
bool coregroup_enabled;
+bool thread_group_shares_l2;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -99,6 +100,7 @@ enum {
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
+#define THREAD_GROUP_SHARE_L2 2
struct thread_groups {
unsigned int property;
unsigned int nr_groups;
@@ -107,7 +109,7 @@ struct thread_groups {
};
/* Maximum number of properties that groups of threads within a core can share
*/
-#define MAX_THREAD_GROUP_PROPERTIES 1
+#define MAX_THREAD_GROUP_PROPERTIES 2
struct thread_groups_list {
unsigned int nr_properties;
@@ -121,6 +123,13 @@ struct thread_groups_list {
*/
DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -718,7 +727,9 @@ static void or_cpumasks_related(int i, int j, struct
cpumask *(*srcmask)(int),
*
* ibm,thread-groups[i + 0] tells us the property based on which the
* threads are being grouped together. If this value is 1, it implies
- * that the threads in the same group share L1, translation cache.
+ * that the threads in the same group share L1, translation cache. If
+ * the