On Thu, Nov 12, 2020 at 08:40:05AM -0500, Joel Fernandes wrote: > On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote: > > > > > > On 11.11.20 23:15, Joel Fernandes wrote: > > > > > > On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes <j...@joelfernandes.org> > > > wrote: > > > > > > > > On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf <g...@amazon.com> wrote: > > > > > On 11.11.20 22:14, Joel Fernandes wrote: > > > > > > > Some hardware such as certain AMD variants don't have cross-HT > > > > > > > MDS/L1TF > > > > > > > issues. Detect this and don't enable core scheduling as it can > > > > > > > needlessly slow the device done. > > > > > > > > > > > > > > diff --git a/arch/x86/kernel/cpu/bugs.c > > > > > > > b/arch/x86/kernel/cpu/bugs.c > > > > > > > index dece79e4d1e9..0e6e61e49b23 100644 > > > > > > > --- a/arch/x86/kernel/cpu/bugs.c > > > > > > > +++ b/arch/x86/kernel/cpu/bugs.c > > > > > > > @@ -152,6 +152,14 @@ void __init check_bugs(void) > > > > > > > #endif > > > > > > > } > > > > > > > > > > > > > > +/* > > > > > > > + * Do not need core scheduling if CPU does not have MDS/L1TF > > > > > > > vulnerability. > > > > > > > + */ > > > > > > > +int arch_allow_core_sched(void) > > > > > > > +{ > > > > > > > + return boot_cpu_has_bug(X86_BUG_MDS) || > > > > > > > boot_cpu_has_bug(X86_BUG_L1TF); > > > > > > > > > > Can we make this more generic and user settable, similar to the L1 > > > > > cache > > > > > flushing modes in KVM? > > > > > > > > > > I am not 100% convinced that there are no other thread sibling attacks > > > > > possible without MDS and L1TF. If I'm paranoid, I want to still be > > > > > able > > > > > to force enable core scheduling. > > > > > > > > > > In addition, we are also using core scheduling as a poor man's > > > > > mechanism > > > > > to give customers consistent performance for virtual machine thread > > > > > siblings. This is important irrespective of CPU bugs. In such a > > > > > scenario, I want to force enable core scheduling. > > > > > > > > Ok, I can make it new kernel command line option with: > > > > coresched=on > > > > coresched=secure (only if HW has MDS/L1TF) > > > > coresched=off > > > > > > Also, I would keep "secure" as the default. (And probably, we should > > > modify the informational messages in sysfs to reflect this..) > > > > I agree that "secure" should be the default. > > Ok.
Something like so then: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dece79e4d1e9..3c2457d47f54 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -152,6 +152,21 @@ void __init check_bugs(void) #endif } +/* + * When coresched=secure, do not need coresched if CPU does not have MDS/L1TF bugs. + */ +int arch_allow_core_sched(void) +{ + /* + * x86: Disallow coresched if it is in secure mode and the CPU does not + * have vulnerabilities. + */ + if (coresched_cmd_secure()) + return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF); + else + return true; +} + void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d6428aaf67e7..1be5cf85a4a6 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -228,4 +228,7 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +extern bool coresched_cmd_off(void); +extern bool coresched_cmd_secure(void); + #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ff2578ecf17..674edf534cc5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2552,3 +2552,46 @@ bool cpu_mitigations_auto_nosmt(void) return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); + +/* + * These are used for a global "coresched=" cmdline option for controlling + * core scheduling. Note that core sched may be needed for usecases other + * than security as well. + */ +enum coresched_cmds { + CORE_SCHED_OFF, + CORE_SCHED_SECURE, + CORE_SCHED_ON, +}; + +static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE; + +static int __init coresched_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + coresched_cmd = CORE_SCHED_OFF; + else if (!strcmp(arg, "on")) + coresched_cmd = CORE_SCHED_ON; + else if (!strcmp(arg, "secure")) + coresched_cmd = CORE_SCHED_SECURE; + else + pr_crit("Unsupported coresched=%s, defaulting to secure.\n", + arg); + + return 0; +} +early_param("coresched", coresched_parse_cmdline); + +/* coresched=off */ +bool coresched_cmd_off(void) +{ + return coresched_cmd == CORE_SCHED_OFF; +} +EXPORT_SYMBOL_GPL(coresched_cmd_off); + +/* coresched=secure */ +bool coresched_cmd_secure(void) +{ + return coresched_cmd == CORE_SCHED_SECURE; +} +EXPORT_SYMBOL_GPL(coresched_cmd_secure); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5ed26b469ed6..6f586d221ddb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -333,8 +333,23 @@ static void __sched_core_disable(void) printk("core sched disabled\n"); } +static bool __coresched_supported(void) +{ + /* coreched=off command line option. */ + if (coresched_cmd_off()) + return false; + + /* + * Some arch may not need coresched, example some x86 may not need + * coresched if coresched=secure option is passed (=secure is default). + */ + return arch_allow_core_sched(); +} + void sched_core_get(void) { + if (!__coresched_supported()) + return; mutex_lock(&sched_core_mutex); if (!sched_core_count++) __sched_core_enable(); @@ -343,6 +358,8 @@ void sched_core_get(void) void sched_core_put(void) { + if (!__coresched_supported()) + return; mutex_lock(&sched_core_mutex); if (!--sched_core_count) __sched_core_disable(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ada56d8ce56f..20d2aa53336e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1206,6 +1206,11 @@ int cpu_core_tag_color_write_u64(struct cgroup_subsys_state *css, bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi); +int __weak arch_allow_core_sched(void) +{ + return true; +} + #else /* !CONFIG_SCHED_CORE */ static inline bool sched_core_enqueued(struct task_struct *task) { return false; }