Introduce a synthetic feature flag to use alternative instruction patching to NOP out all code on entry/exit paths other than those involved in NMI/#MC handling (the patching logic can't properly handle those paths yet). Having NOPs here is generally better than using conditional branches.
Also change the limit on the number of bytes we can patch in one go to that resulting from the encoding in struct alt_instr - there's no point reducing it below that limit, and without a check being in place that the limit isn't actually exceeded, such an artificial boundary is a latent risk. Signed-off-by: Jan Beulich <jbeul...@suse.com> --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -24,7 +24,7 @@ #include <asm/nmi.h> #include <xen/livepatch.h> -#define MAX_PATCH_LEN (255-1) +#define MAX_PATCH_LEN 255 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3654,7 +3654,7 @@ long do_mmu_update( rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v); if ( !rc ) - sync_guest = this_cpu(root_pgt); + sync_guest = !cpu_has_no_xpti; break; case PGT_writable_page: --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -169,6 +169,9 @@ static int __init parse_smap_param(const } custom_param("smap", parse_smap_param); +static int8_t __initdata opt_xpti = -1; +boolean_param("xpti", opt_xpti); + bool __read_mostly acpi_disabled; bool __initdata acpi_force; static char __initdata acpi_param[10] = ""; @@ -1540,6 +1543,13 @@ void __init noreturn __start_xen(unsigne cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS; + if ( opt_xpti < 0 ) + opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD; + if ( opt_xpti ) + setup_clear_cpu_cap(X86_FEATURE_NO_XPTI); + else + setup_force_cpu_cap(X86_FEATURE_NO_XPTI); + if ( cpu_has_fsgsbase ) set_in_cr4(X86_CR4_FSGSBASE); --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -735,8 +735,6 @@ static int clone_mapping(const void *ptr return 0; } -static __read_mostly int8_t opt_xpti = -1; -boolean_param("xpti", opt_xpti); DEFINE_PER_CPU(root_pgentry_t *, root_pgt); static int setup_cpu_root_pgt(unsigned int cpu) @@ -745,7 +743,7 @@ static int setup_cpu_root_pgt(unsigned i unsigned int off; int rc; - if ( !opt_xpti ) + if ( cpu_has_no_xpti ) return 0; rpt = alloc_xen_pagetable(); @@ -999,9 +997,6 @@ void __init smp_prepare_cpus(unsigned in stack_base[0] = stack_start; - if ( opt_xpti < 0 ) - opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD; - rc = setup_cpu_root_pgt(0); if ( rc ) panic("Error %d setting up PV root page table\n", rc); --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -189,7 +189,7 @@ ENTRY(compat_post_handle_exception) /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) - /* sti could live here when we don't switch page tables below. */ + ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI CR4_PV32_RESTORE movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) @@ -201,6 +201,7 @@ ENTRY(cstar_enter) SAVE_ALL GET_STACK_END(bx) +.Lcstar_cr3_start: mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx neg %rcx jz .Lcstar_cr3_okay @@ -210,6 +211,12 @@ ENTRY(cstar_enter) movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lcstar_cr3_okay: sti +.Lcstar_cr3_end: + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lcstar_cr3_start, .Lcstar_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Lcstar_cr3_end - .Lcstar_cr3_start), 0 + .popsection __GET_CURRENT(bx) movq VCPU_domain(%rbx),%rcx --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -39,6 +39,7 @@ restore_all_guest: ASSERT_INTERRUPTS_DISABLED /* Copy guest mappings and switch to per-CPU root page table. */ +.Lrag_cr3_start: mov VCPU_cr3(%rbx), %r9 GET_STACK_END(dx) mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi @@ -46,7 +47,6 @@ restore_all_guest: movabs $DIRECTMAP_VIRT_START, %rcx mov %rdi, %rax and %rsi, %rdi - jz .Lrag_keep_cr3 and %r9, %rsi add %rcx, %rdi add %rcx, %rsi @@ -64,7 +64,12 @@ restore_all_guest: mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) write_cr3 rax, rdi, rsi -.Lrag_keep_cr3: +.Lrag_cr3_end: + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lrag_cr3_start, .Lrag_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Lrag_cr3_end - .Lrag_cr3_start), 0 + .popsection RESTORE_ALL testw $TRAP_syscall,4(%rsp) @@ -137,7 +142,7 @@ UNLIKELY_END(exit_cr3) * %ss must be saved into the space left by the trampoline. */ ENTRY(lstar_enter) - /* sti could live here when we don't switch page tables below. */ + ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) pushq %r11 @@ -148,6 +153,7 @@ ENTRY(lstar_enter) SAVE_ALL GET_STACK_END(bx) +.Llstar_cr3_start: mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx neg %rcx jz .Llstar_cr3_okay @@ -157,6 +163,12 @@ ENTRY(lstar_enter) movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Llstar_cr3_okay: sti +.Llstar_cr3_end: + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Llstar_cr3_start, .Llstar_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Llstar_cr3_end - .Llstar_cr3_start), 0 + .popsection __GET_CURRENT(bx) testb $TF_kernel_mode,VCPU_thread_flags(%rbx) @@ -239,7 +251,7 @@ process_trap: jmp test_all_events ENTRY(sysenter_entry) - /* sti could live here when we don't switch page tables below. */ + ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI pushq $FLAT_USER_SS pushq $0 pushfq @@ -252,6 +264,7 @@ GLOBAL(sysenter_eflags_saved) SAVE_ALL GET_STACK_END(bx) +.Lsyse_cr3_start: mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx neg %rcx jz .Lsyse_cr3_okay @@ -261,6 +274,12 @@ GLOBAL(sysenter_eflags_saved) movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lsyse_cr3_okay: sti +.Lsyse_cr3_end: + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lsyse_cr3_start, .Lsyse_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Lsyse_cr3_end - .Lsyse_cr3_start), 0 + .popsection __GET_CURRENT(bx) cmpb $0,VCPU_sysenter_disables_events(%rbx) @@ -299,6 +318,7 @@ ENTRY(int80_direct_trap) SAVE_ALL GET_STACK_END(bx) +.Lint80_cr3_start: mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx neg %rcx jz .Lint80_cr3_okay @@ -308,6 +328,12 @@ ENTRY(int80_direct_trap) movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lint80_cr3_okay: sti +.Lint80_cr3_end: + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lint80_cr3_start, .Lint80_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Lint80_cr3_end - .Lint80_cr3_start), 0 + .popsection cmpb $0,untrusted_msi(%rip) UNLIKELY_START(ne, msi_check) @@ -473,6 +499,7 @@ ENTRY(dom_crash_sync_extable) ENTRY(common_interrupt) SAVE_ALL CLAC +.Lintr_cr3_start: GET_STACK_END(14) mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx mov %rcx, %r15 @@ -492,9 +519,20 @@ ENTRY(common_interrupt) CR4_PV32_RESTORE movq %rsp,%rdi callq do_IRQ +.Lintr_cr3_restore: mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +.Lintr_cr3_end: jmp ret_from_intr + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lintr_cr3_restore, .Lintr_cr3_restore, \ + X86_FEATURE_NO_XPTI, \ + (.Lintr_cr3_end - .Lintr_cr3_restore), 0 + altinstruction_entry .Lintr_cr3_start, .Lintr_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Lintr_cr3_okay - .Lintr_cr3_start), 0 + .popsection + /* No special register assumptions. */ ENTRY(ret_from_intr) GET_CURRENT(bx) @@ -511,6 +549,7 @@ ENTRY(page_fault) GLOBAL(handle_exception) SAVE_ALL CLAC +.Lxcpt_cr3_start: GET_STACK_END(14) mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx mov %rcx, %r15 @@ -592,7 +631,9 @@ handle_exception_saved: PERFC_INCR(exceptions, %rax, %rbx) mov (%rdx, %rax, 8), %rdx INDIRECT_CALL %rdx +.Lxcpt_cr3_restore1: mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +.Lxcpt_cr3_end1: testb $3,UREGS_cs(%rsp) jz restore_all_xen leaq VCPU_trap_bounce(%rbx),%rdx @@ -625,9 +666,23 @@ exception_with_ints_disabled: rep; movsq # make room for ec/ev 1: movq UREGS_error_code(%rsp),%rax # ec/ev movq %rax,UREGS_kernel_sizeof(%rsp) +.Lxcpt_cr3_restore2: mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +.Lxcpt_cr3_end2: jmp restore_all_xen # return to fixup code + .pushsection .altinstructions, "a", @progbits + altinstruction_entry .Lxcpt_cr3_restore1, .Lxcpt_cr3_restore1, \ + X86_FEATURE_NO_XPTI, \ + (.Lxcpt_cr3_end1 - .Lxcpt_cr3_restore1), 0 + altinstruction_entry .Lxcpt_cr3_restore2, .Lxcpt_cr3_restore2, \ + X86_FEATURE_NO_XPTI, \ + (.Lxcpt_cr3_end2 - .Lxcpt_cr3_restore2), 0 + altinstruction_entry .Lxcpt_cr3_start, .Lxcpt_cr3_start, \ + X86_FEATURE_NO_XPTI, \ + (.Lxcpt_cr3_okay - .Lxcpt_cr3_start), 0 + .popsection + /* No special register assumptions. */ FATAL_exception_with_ints_disabled: xorl %esi,%esi --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -105,6 +105,7 @@ #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) #define cpu_has_aperfmperf boot_cpu_has(X86_FEATURE_APERFMPERF) #define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH) +#define cpu_has_no_xpti boot_cpu_has(X86_FEATURE_NO_XPTI) enum _cache_type { CACHE_TYPE_NULL = 0, --- a/xen/include/asm-x86/cpufeatures.h +++ b/xen/include/asm-x86/cpufeatures.h @@ -28,3 +28,4 @@ XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPIN XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */ XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */ +XEN_CPUFEATURE(NO_XPTI, (FSCAPINTS+0)*32+18) /* XPTI mitigation not in use */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel