Instead of switching XPTI globally on or off add a per-domain flag for that purpose. This allows to modify the xpti boot parameter to support running dom0 without Meltdown mitigations. Using "xpti=nodom0" as boot parameter will achieve that.
Move the xpti boot parameter handling to xen/arch/x86/pv/domain.c as it is pv-domain specific. Signed-off-by: Juergen Gross <jgr...@suse.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- V6.1: - address some minor comments (Jan Beulich) V6: - modify xpti boot parameter options (Andrew Cooper) - move xpti_init() code to spec_ctrl.c (Andrew Cooper) - irework init of per-domain xpti flag (Andrew Cooper) V3: - latch get_cpu_info() return value in variable (Jan Beulich) - call always xpti_domain_init() for pv dom0 (Jan Beulich) - add __init annotations (Jan Beulich) - drop per domain XPTI message (Jan Beulich) - document xpti=default support (Jan Beulich) - move domain xpti flag into a padding hole (Jan Beulich) --- docs/misc/xen-command-line.markdown | 14 ++++++-- xen/arch/x86/mm.c | 17 +++++++-- xen/arch/x86/pv/dom0_build.c | 1 + xen/arch/x86/pv/domain.c | 6 ++++ xen/arch/x86/setup.c | 19 ---------- xen/arch/x86/smpboot.c | 4 +-- xen/arch/x86/spec_ctrl.c | 70 +++++++++++++++++++++++++++++++++++++ xen/include/asm-x86/current.h | 3 +- xen/include/asm-x86/domain.h | 3 ++ xen/include/asm-x86/spec_ctrl.h | 4 +++ 10 files changed, 115 insertions(+), 26 deletions(-) diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown index b353352adf..d4f758487a 100644 --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -1955,14 +1955,24 @@ clustered mode. The default, given no hint from the **FADT**, is cluster mode. ### xpti -> `= <boolean>` +> `= List of [ default | <boolean> | dom0=<bool> | domu=<bool> ]` -> Default: `false` on AMD hardware +> Default: `false` on hardware not vulnerable to Meltdown (e.g. AMD) > Default: `true` everywhere else Override default selection of whether to isolate 64-bit PV guest page tables. +`true` activates page table isolation even on hardware not vulnerable by +Meltdown for all domains. + +`false` deactivates page table isolation on all systems for all domains. + +`default` sets the default behaviour. + +With `dom0` and `domu` it is possible to control page table isolation +for dom0 or guest domains only. + ### xsave > `= <boolean>` diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index e245d96a97..9c36614099 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -502,8 +502,21 @@ void make_cr3(struct vcpu *v, mfn_t mfn) void write_ptbase(struct vcpu *v) { - get_cpu_info()->root_pgt_changed = true; - switch_cr3(v->arch.cr3); + struct cpu_info *cpu_info = get_cpu_info(); + + if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti ) + { + cpu_info->root_pgt_changed = true; + cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)); + switch_cr3(v->arch.cr3); + } + else + { + /* Make sure to clear xen_cr3 before pv_cr3; switch_cr3() serializes. */ + cpu_info->xen_cr3 = 0; + switch_cr3(v->arch.cr3); + cpu_info->pv_cr3 = 0; + } } /* diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index 5b4325b87f..d148395919 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -387,6 +387,7 @@ int __init dom0_construct_pv(struct domain *d, if ( compat32 ) { d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; + d->arch.pv_domain.xpti = false; v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; if ( setup_compat_arg_xlat(v) != 0 ) BUG(); diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c index be40843b05..ce1a1a9d35 100644 --- a/xen/arch/x86/pv/domain.c +++ b/xen/arch/x86/pv/domain.c @@ -9,6 +9,7 @@ #include <xen/lib.h> #include <xen/sched.h> +#include <asm/spec_ctrl.h> #include <asm/pv/domain.h> static void noreturn continue_nonidle_domain(struct vcpu *v) @@ -75,6 +76,8 @@ int switch_compat(struct domain *d) d->arch.x87_fip_width = 4; + d->arch.pv_domain.xpti = false; + return 0; undo_and_fail: @@ -205,6 +208,9 @@ int pv_domain_initialise(struct domain *d) /* 64-bit PV guest by default. */ d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; + d->arch.pv_domain.xpti = opt_xpti & (is_hardware_domain(d) + ? OPT_XPTI_DOM0 : OPT_XPTI_DOMU); + return 0; fail: diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index b521db25a8..887d75a981 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -169,9 +169,6 @@ static int __init parse_smap_param(const char *s) } custom_param("smap", parse_smap_param); -static int8_t __initdata opt_xpti = -1; -boolean_param("xpti", opt_xpti); - bool __read_mostly acpi_disabled; bool __initdata acpi_force; static char __initdata acpi_param[10] = ""; @@ -1546,22 +1543,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS; - if ( opt_xpti < 0 ) - { - uint64_t caps = 0; - - if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) - caps = ARCH_CAPABILITIES_RDCL_NO; - else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) - rdmsrl(MSR_ARCH_CAPABILITIES, caps); - - opt_xpti = !(caps & ARCH_CAPABILITIES_RDCL_NO); - } - if ( opt_xpti ) - setup_clear_cpu_cap(X86_FEATURE_NO_XPTI); - else - setup_force_cpu_cap(X86_FEATURE_NO_XPTI); - if ( cpu_has_fsgsbase ) set_in_cr4(X86_CR4_FSGSBASE); diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index e7f5504ec3..b4cfb01fd9 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -325,7 +325,7 @@ void start_secondary(void *unused) spin_debug_disable(); get_cpu_info()->xen_cr3 = 0; - get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0; + get_cpu_info()->pv_cr3 = 0; load_system_tables(); @@ -1044,7 +1044,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) panic("Error %d setting up PV root page table\n", rc); if ( per_cpu(root_pgt, 0) ) { - get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); + get_cpu_info()->pv_cr3 = 0; /* * All entry points which may need to switch page tables have to start diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c index 5b5ec90fd8..2300e9eba9 100644 --- a/xen/arch/x86/spec_ctrl.c +++ b/xen/arch/x86/spec_ctrl.c @@ -193,6 +193,70 @@ static bool __init retpoline_safe(void) } } +#define OPT_XPTI_DEFAULT 0xff +uint8_t __read_mostly opt_xpti = OPT_XPTI_DEFAULT; + +static __init void xpti_init_default(bool force) +{ + uint64_t caps = 0; + + if ( !force && (opt_xpti != OPT_XPTI_DEFAULT) ) + return; + + if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) + caps = ARCH_CAPABILITIES_RDCL_NO; + else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) + rdmsrl(MSR_ARCH_CAPABILITIES, caps); + + if ( caps & ARCH_CAPABILITIES_RDCL_NO ) + opt_xpti = 0; + else + opt_xpti = OPT_XPTI_DOM0 | OPT_XPTI_DOMU; +} + +static __init int parse_xpti(const char *s) +{ + const char *ss; + int val, rc = 0; + + xpti_init_default(false); + + do { + ss = strchr(s, ','); + if ( !ss ) + ss = strchr(s, '\0'); + + switch ( parse_bool(s, ss) ) + { + case 0: + opt_xpti = 0; + break; + + case 1: + opt_xpti = OPT_XPTI_DOM0 | OPT_XPTI_DOMU; + break; + + default: + if ( !strcmp(s, "default") ) + xpti_init_default(true); + else if ( (val = parse_boolean("dom0", s, ss)) >= 0 ) + opt_xpti = (opt_xpti & ~OPT_XPTI_DOM0) | + (val ? OPT_XPTI_DOM0 : 0); + else if ( (val = parse_boolean("domu", s, ss)) >= 0 ) + opt_xpti = (opt_xpti & ~OPT_XPTI_DOMU) | + (val ? OPT_XPTI_DOMU : 0); + else + rc = -EINVAL; + break; + } + + s = ss + 1; + } while ( *ss ); + + return rc; +} +custom_param("xpti", parse_xpti); + void __init init_speculation_mitigations(void) { enum ind_thunk thunk = THUNK_DEFAULT; @@ -310,6 +374,12 @@ void __init init_speculation_mitigations(void) /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ init_shadow_spec_ctrl_state(); + xpti_init_default(false); + if ( opt_xpti == 0 ) + setup_force_cpu_cap(X86_FEATURE_NO_XPTI); + else + setup_clear_cpu_cap(X86_FEATURE_NO_XPTI); + print_details(thunk); } diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h index f2491b4423..b2475783f8 100644 --- a/xen/include/asm-x86/current.h +++ b/xen/include/asm-x86/current.h @@ -44,7 +44,8 @@ struct cpu_info { /* * Of the two following fields the latter is being set to the CR3 value * to be used on the given pCPU for loading whenever 64-bit PV guest - * context is being entered. The value never changes once set. + * context is being entered. A value of zero indicates no setting of CR3 + * is to be performed. * The former is the value to restore when re-entering Xen, if any. IOW * its value being zero means there's nothing to restore. However, its * value can also be negative, indicating to the exit-to-Xen code that diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index a12ae47f1b..ed4199931a 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -253,6 +253,9 @@ struct pv_domain atomic_t nr_l4_pages; + /* XPTI active? */ + bool xpti; + /* map_domain_page() mapping cache. */ struct mapcache_domain mapcache; diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h index 5ab4ff3f68..b4fa43269e 100644 --- a/xen/include/asm-x86/spec_ctrl.h +++ b/xen/include/asm-x86/spec_ctrl.h @@ -29,6 +29,10 @@ void init_speculation_mitigations(void); extern bool opt_ibpb; extern uint8_t default_bti_ist_info; +extern uint8_t opt_xpti; +#define OPT_XPTI_DOM0 0x01 +#define OPT_XPTI_DOMU 0x02 + static inline void init_shadow_spec_ctrl_state(void) { struct cpu_info *info = get_cpu_info(); -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel