Introduce a synthetic feature flag to use alternative instruction
patching to NOP out all code on entry/exit paths other than those
involved in NMI/#MC handling (the patching logic can't properly handle
those paths yet). Having NOPs here is generally better than using
conditional branches.

Also change the limit on the number of bytes we can patch in one go to
that resulting from the encoding in struct alt_instr - there's no point
reducing it below that limit, and without a check being in place that
the limit isn't actually exceeded, such an artificial boundary is a
latent risk.

Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -24,7 +24,7 @@
 #include <asm/nmi.h>
 #include <xen/livepatch.h>
 
-#define MAX_PATCH_LEN (255-1)
+#define MAX_PATCH_LEN 255
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3654,7 +3654,7 @@ long do_mmu_update(
                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
                     if ( !rc )
-                        sync_guest = this_cpu(root_pgt);
+                        sync_guest = !cpu_has_no_xpti;
                     break;
 
                 case PGT_writable_page:
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -169,6 +169,9 @@ static int __init parse_smap_param(const
 }
 custom_param("smap", parse_smap_param);
 
+static int8_t __initdata opt_xpti = -1;
+boolean_param("xpti", opt_xpti);
+
 bool __read_mostly acpi_disabled;
 bool __initdata acpi_force;
 static char __initdata acpi_param[10] = "";
@@ -1540,6 +1543,13 @@ void __init noreturn __start_xen(unsigne
 
     cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
 
+    if ( opt_xpti < 0 )
+        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+    if ( opt_xpti )
+        setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
+    else
+        setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
+
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
 
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -735,8 +735,6 @@ static int clone_mapping(const void *ptr
     return 0;
 }
 
-static __read_mostly int8_t opt_xpti = -1;
-boolean_param("xpti", opt_xpti);
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
 
 static int setup_cpu_root_pgt(unsigned int cpu)
@@ -745,7 +743,7 @@ static int setup_cpu_root_pgt(unsigned i
     unsigned int off;
     int rc;
 
-    if ( !opt_xpti )
+    if ( cpu_has_no_xpti )
         return 0;
 
     rpt = alloc_xen_pagetable();
@@ -999,9 +997,6 @@ void __init smp_prepare_cpus(unsigned in
 
     stack_base[0] = stack_start;
 
-    if ( opt_xpti < 0 )
-        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
-
     rc = setup_cpu_root_pgt(0);
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -189,7 +189,7 @@ ENTRY(compat_post_handle_exception)
 
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         CR4_PV32_RESTORE
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
@@ -201,6 +201,7 @@ ENTRY(cstar_enter)
         SAVE_ALL
 
         GET_STACK_END(bx)
+.Lcstar_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lcstar_cr3_okay
@@ -210,6 +211,12 @@ ENTRY(cstar_enter)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lcstar_cr3_okay:
         sti
+.Lcstar_cr3_end:
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Lcstar_cr3_start, .Lcstar_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lcstar_cr3_end - .Lcstar_cr3_start), 0
+        .popsection
 
         __GET_CURRENT(bx)
         movq  VCPU_domain(%rbx),%rcx
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -39,6 +39,7 @@ restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
 
         /* Copy guest mappings and switch to per-CPU root page table. */
+.Lrag_cr3_start:
         mov   VCPU_cr3(%rbx), %r9
         GET_STACK_END(dx)
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
@@ -46,7 +47,6 @@ restore_all_guest:
         movabs $DIRECTMAP_VIRT_START, %rcx
         mov   %rdi, %rax
         and   %rsi, %rdi
-        jz    .Lrag_keep_cr3
         and   %r9, %rsi
         add   %rcx, %rdi
         add   %rcx, %rsi
@@ -64,7 +64,12 @@ restore_all_guest:
         mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
         write_cr3 rax, rdi, rsi
-.Lrag_keep_cr3:
+.Lrag_cr3_end:
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Lrag_cr3_start, .Lrag_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lrag_cr3_end - .Lrag_cr3_start), 0
+        .popsection
 
         RESTORE_ALL
         testw $TRAP_syscall,4(%rsp)
@@ -137,7 +142,7 @@ UNLIKELY_END(exit_cr3)
  * %ss must be saved into the space left by the trampoline.
  */
 ENTRY(lstar_enter)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
         pushq %r11
@@ -148,6 +153,7 @@ ENTRY(lstar_enter)
         SAVE_ALL
 
         GET_STACK_END(bx)
+.Llstar_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Llstar_cr3_okay
@@ -157,6 +163,12 @@ ENTRY(lstar_enter)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
         sti
+.Llstar_cr3_end:
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Llstar_cr3_start, .Llstar_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Llstar_cr3_end - .Llstar_cr3_start), 0
+        .popsection
 
         __GET_CURRENT(bx)
         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
@@ -239,7 +251,7 @@ process_trap:
         jmp  test_all_events
 
 ENTRY(sysenter_entry)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         pushq $FLAT_USER_SS
         pushq $0
         pushfq
@@ -252,6 +264,7 @@ GLOBAL(sysenter_eflags_saved)
         SAVE_ALL
 
         GET_STACK_END(bx)
+.Lsyse_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lsyse_cr3_okay
@@ -261,6 +274,12 @@ GLOBAL(sysenter_eflags_saved)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
         sti
+.Lsyse_cr3_end:
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Lsyse_cr3_start, .Lsyse_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lsyse_cr3_end - .Lsyse_cr3_start), 0
+        .popsection
 
         __GET_CURRENT(bx)
         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
@@ -299,6 +318,7 @@ ENTRY(int80_direct_trap)
         SAVE_ALL
 
         GET_STACK_END(bx)
+.Lint80_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lint80_cr3_okay
@@ -308,6 +328,12 @@ ENTRY(int80_direct_trap)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
         sti
+.Lint80_cr3_end:
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Lint80_cr3_start, .Lint80_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lint80_cr3_end - .Lint80_cr3_start), 0
+        .popsection
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -473,6 +499,7 @@ ENTRY(dom_crash_sync_extable)
 ENTRY(common_interrupt)
         SAVE_ALL CLAC
 
+.Lintr_cr3_start:
         GET_STACK_END(14)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
@@ -492,9 +519,20 @@ ENTRY(common_interrupt)
         CR4_PV32_RESTORE
         movq %rsp,%rdi
         callq do_IRQ
+.Lintr_cr3_restore:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.Lintr_cr3_end:
         jmp ret_from_intr
 
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Lintr_cr3_restore, .Lintr_cr3_restore, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lintr_cr3_end - .Lintr_cr3_restore), 0
+        altinstruction_entry .Lintr_cr3_start, .Lintr_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lintr_cr3_okay - .Lintr_cr3_start), 0
+        .popsection
+
 /* No special register assumptions. */
 ENTRY(ret_from_intr)
         GET_CURRENT(bx)
@@ -511,6 +549,7 @@ ENTRY(page_fault)
 GLOBAL(handle_exception)
         SAVE_ALL CLAC
 
+.Lxcpt_cr3_start:
         GET_STACK_END(14)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
@@ -592,7 +631,9 @@ handle_exception_saved:
         PERFC_INCR(exceptions, %rax, %rbx)
         mov   (%rdx, %rax, 8), %rdx
         INDIRECT_CALL %rdx
+.Lxcpt_cr3_restore1:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.Lxcpt_cr3_end1:
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -625,9 +666,23 @@ exception_with_ints_disabled:
         rep;  movsq                     # make room for ec/ev
 1:      movq  UREGS_error_code(%rsp),%rax # ec/ev
         movq  %rax,UREGS_kernel_sizeof(%rsp)
+.Lxcpt_cr3_restore2:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.Lxcpt_cr3_end2:
         jmp   restore_all_xen           # return to fixup code
 
+        .pushsection .altinstructions, "a", @progbits
+        altinstruction_entry .Lxcpt_cr3_restore1, .Lxcpt_cr3_restore1, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lxcpt_cr3_end1 - .Lxcpt_cr3_restore1), 0
+        altinstruction_entry .Lxcpt_cr3_restore2, .Lxcpt_cr3_restore2, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lxcpt_cr3_end2 - .Lxcpt_cr3_restore2), 0
+        altinstruction_entry .Lxcpt_cr3_start, .Lxcpt_cr3_start, \
+                             X86_FEATURE_NO_XPTI, \
+                             (.Lxcpt_cr3_okay - .Lxcpt_cr3_start), 0
+        .popsection
+
 /* No special register assumptions. */
 FATAL_exception_with_ints_disabled:
         xorl  %esi,%esi
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -105,6 +105,7 @@
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
 #define cpu_has_aperfmperf      boot_cpu_has(X86_FEATURE_APERFMPERF)
 #define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH)
+#define cpu_has_no_xpti         boot_cpu_has(X86_FEATURE_NO_XPTI)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
--- a/xen/include/asm-x86/cpufeatures.h
+++ b/xen/include/asm-x86/cpufeatures.h
@@ -28,3 +28,4 @@ XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPIN
 XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */
 XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in 
Xen */
 XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in 
Xen */
+XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+18) /* XPTI mitigation not in 
use */



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to