Adds support for IA32_PQR_ASSOC MSR writes during task scheduling.
For Cache Allocation, MSR write would let the task fill in the cache
'subset' represented by the cgroup's cache_mask.

The high 32 bits in the per processor MSR IA32_PQR_ASSOC represents the
CLOSid. During context switch kernel implements this by writing the
CLOSid of the cgroup to which the task belongs to the CPU's
IA32_PQR_ASSOC MSR.

The following considerations are done for the PQR MSR write so that it
minimally impacts scheduler hot path:
- This path does not exist on any non-intel platforms.
- On Intel platforms, this would not exist by default unless CGROUP_RDT
is enabled.
- remains a no-op when CGROUP_RDT is enabled and intel SKU does not
support the feature.
- When feature is available and enabled, never does MSR write till the
user manually creates a cgroup directory *and* assigns a cache_mask
different from root cgroup directory.  Since the child node inherits the
parents cache mask , by cgroup creation there is no scheduling hot path
impact from the new cgroup.
- MSR write is only done when there is a task with different Closid is
scheduled on the CPU. Typically if the task groups are bound to be
scheduled on a set of CPUs , the number of MSR writes is greatly
reduced.
- For cgroup directories having same cache_mask the CLOSids are reused.
This minimizes the number of CLOSids used and hence reduces the MSR
write frequency.

Signed-off-by: Vikas Shivappa <vikas.shiva...@linux.intel.com>
---
 arch/x86/include/asm/intel_rdt.h | 44 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/switch_to.h |  3 +++
 arch/x86/kernel/cpu/intel_rdt.c  | 30 +++++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 9e9dbbe..589394b 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,9 +4,15 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+
+#define MSR_IA32_PQR_ASSOC             0xc8f
 #define MAX_CBM_LENGTH                 32
 #define IA32_L3_CBM_BASE               0xc90
 #define CBM_FROM_INDEX(x)              (IA32_L3_CBM_BASE + x)
+DECLARE_PER_CPU(unsigned int, x86_cpu_clos);
+extern struct static_key rdt_enable_key;
+extern void __rdt_sched_in(void);
+
 
 struct rdt_subsys_info {
        /* Clos Bitmap to keep track of available CLOSids.*/
@@ -24,6 +30,11 @@ struct clos_cbm_map {
        unsigned int clos_refcnt;
 };
 
+static inline bool rdt_enabled(void)
+{
+       return static_key_false(&rdt_enable_key);
+}
+
 /*
  * Return rdt group corresponding to this container.
  */
@@ -37,5 +48,38 @@ static inline struct intel_rdt *parent_rdt(struct intel_rdt 
*ir)
        return css_rdt(ir->css.parent);
 }
 
+/*
+ * Return rdt group to which this task belongs.
+ */
+static inline struct intel_rdt *task_rdt(struct task_struct *task)
+{
+       return css_rdt(task_css(task, intel_rdt_cgrp_id));
+}
+
+/*
+ * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ * which supports L3 cache allocation.
+ * - When support is present and enabled, does not do any
+ * IA32_PQR_MSR writes until the user starts really using the feature
+ * ie creates a rdt cgroup directory and assigns a cache_mask thats
+ * different from the root cgroup's cache_mask.
+ * - Closids are allocated so that different cgroup directories
+ * with same cache_mask gets the same CLOSid. This minimizes CLOSids
+ * used and reduces MSR write frequency.
+ */
+static inline void intel_rdt_sched_in(void)
+{
+       if (rdt_enabled())
+               __rdt_sched_in();
+}
+
+#else
+
+static inline void intel_rdt_sched_in(struct task_struct *task) {}
+
 #endif
 #endif
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b..9149577 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -8,6 +8,9 @@ struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                      struct tss_struct *tss);
 
+#include <asm/intel_rdt.h>
+#define finish_arch_switch(prev)       intel_rdt_sched_in()
+
 #ifdef CONFIG_X86_32
 
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 125318d..fe3ce4e 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -35,6 +35,8 @@ static struct clos_cbm_map *ccmap;
 static struct rdt_subsys_info rdtss_info;
 static DEFINE_MUTEX(rdt_group_mutex);
 struct intel_rdt rdt_root_group;
+struct static_key __read_mostly rdt_enable_key = STATIC_KEY_INIT_FALSE;
+DEFINE_PER_CPU(unsigned int, x86_cpu_clos);
 
 /*
  * Mask of CPUs for writing CBM values. We only need one per-socket.
@@ -79,6 +81,33 @@ static void intel_rdt_free_closid(unsigned int clos)
        clear_bit(clos, rdtss_info.closmap);
 }
 
+void __rdt_sched_in(void)
+{
+       struct task_struct *task = current;
+       struct intel_rdt *ir;
+       unsigned int clos;
+
+       /*
+        * This needs to be fixed
+        * to cache the whole PQR instead of just CLOSid.
+        * PQR has closid in high 32 bits and CQM-RMID in low 10 bits.
+        * Should not write a 0 to the low 10 bits of PQR
+        * and corrupt RMID.
+        */
+       clos = this_cpu_read(x86_cpu_clos);
+
+       rcu_read_lock();
+       ir = task_rdt(task);
+       if (ir->clos == clos) {
+               rcu_read_unlock();
+               return;
+       }
+
+       wrmsr(MSR_IA32_PQR_ASSOC, 0, ir->clos);
+       this_cpu_write(x86_cpu_clos, ir->clos);
+       rcu_read_unlock();
+}
+
 static void __clos_get(unsigned int closid)
 {
        struct clos_cbm_map *ccm = &ccmap[closid];
@@ -433,6 +462,7 @@ static int __init intel_rdt_late_init(void)
        __hotcpu_notifier(intel_rdt_cpu_notifier, 0);
 
        cpu_notifier_register_done();
+       static_key_slow_inc(&rdt_enable_key);
 
        pr_info("Max bitmask length:%u,Max ClosIds: %u\n", max_cbm_len, maxid);
 out_err:
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to