An unintended consequence of the BSP using cpu0_stack[] is that writeable
mappings to the BSPs shadow stacks are retained in the bss.  This renders
CET-SS almost useless, as an attacker can update both return addresses and the
ret will not fault.

We specifically don't want the shatter the superpage mapping .data/.bss, so
the only way to fix this is to not have the BSP stack in the main Xen image.

Break cpu_alloc_stack() out of cpu_smpboot_alloc(), and dynamically allocate
the BSP stack as early as reasonable in __start_xen().  As a consequence,
there is no need to delay the BSP's memguard_guard_stack() call.

Copy the top of cpu info block just before switching to use the new stack.
Fix a latent bug by setting %rsp to info->guest_cpu_user_regs rather than
->es; this would be buggy if reinit_bsp_stack() called schedule() (which
rewrites the GPR block) directly, but luckily it doesn't.

Finally, move cpu0_stack[] into .init, so it can be reclaimed after boot.

Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>
---
CC: Jan Beulich <jbeul...@suse.com>
CC: Roger Pau Monné <roger....@citrix.com>
CC: Wei Liu <w...@xen.org>
CC: Marek Marczykowski-Górecki <marma...@invisiblethingslab.com>
---
 xen/arch/x86/include/asm/smp.h |  2 ++
 xen/arch/x86/setup.c           | 20 +++++++++++++-------
 xen/arch/x86/smpboot.c         | 26 +++++++++++++++++++-------
 xen/arch/x86/xen.lds.S         |  4 ++--
 4 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/include/asm/smp.h b/xen/arch/x86/include/asm/smp.h
index 1747772d232e..41a3b6a0dadf 100644
--- a/xen/arch/x86/include/asm/smp.h
+++ b/xen/arch/x86/include/asm/smp.h
@@ -85,6 +85,8 @@ extern cpumask_t **socket_cpumask;
 extern unsigned int disabled_cpus;
 extern bool unaccounted_cpus;
 
+void *cpu_alloc_stack(unsigned int cpu);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 22a9885dee5c..1f816ce05a07 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -148,7 +148,7 @@ cpumask_t __read_mostly cpu_present_map;
 
 unsigned long __read_mostly xen_phys_start;
 
-char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
+char __section("init.bss.stack_aligned") __aligned(STACK_SIZE)
     cpu0_stack[STACK_SIZE];
 
 /* Used by the BSP/AP paths to find the higher half stack mapping to use. */
@@ -712,7 +712,6 @@ static void __init noreturn reinit_bsp_stack(void)
     percpu_traps_init();
 
     stack_base[0] = stack;
-    memguard_guard_stack(stack);
 
     rc = setup_cpu_root_pgt(0);
     if ( rc )
@@ -886,6 +885,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
     char *cmdline, *kextra, *loader;
+    void *bsp_stack;
+    struct cpu_info *info = get_cpu_info(), *bsp_info;
     unsigned int initrdidx, num_parked = 0;
     multiboot_info_t *mbi;
     module_t *mod;
@@ -918,7 +919,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     /* Full exception support from here on in. */
 
     rdmsrl(MSR_EFER, this_cpu(efer));
-    asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) );
+    asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
 
     /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
     enable_nmis();
@@ -1703,6 +1704,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
      */
     vm_init();
 
+    bsp_stack = cpu_alloc_stack(0);
+    if ( !bsp_stack )
+        panic("No memory for BSP stack\n");
+
     console_init_ring();
     vesa_init();
 
@@ -1974,17 +1979,18 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
     if ( bsp_delay_spec_ctrl )
     {
-        struct cpu_info *info = get_cpu_info();
-
         info->spec_ctrl_flags &= ~SCF_use_shadow;
         barrier();
         wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
         info->last_spec_ctrl = default_xen_spec_ctrl;
     }
 
-    /* Jump to the 1:1 virtual mappings of cpu0_stack. */
+    /* Copy the cpu info block, and move onto the BSP stack. */
+    bsp_info = get_cpu_info_from_stack((unsigned long)bsp_stack);
+    *bsp_info = *info;
+
     asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" ::
-                  [stk] "g" (__va(__pa(get_stack_bottom()))),
+                  [stk] "g" (&bsp_info->guest_cpu_user_regs),
                   [fn] "i" (reinit_bsp_stack) : "memory");
     unreachable();
 }
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 709704d71ada..b46fd9ab18e4 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -1023,6 +1023,23 @@ static void cpu_smpboot_free(unsigned int cpu, bool 
remove)
     }
 }
 
+void *cpu_alloc_stack(unsigned int cpu)
+{
+    nodeid_t node = cpu_to_node(cpu);
+    unsigned int memflags = 0;
+    void *stack;
+
+    if ( node != NUMA_NO_NODE )
+        memflags = MEMF_node(node);
+
+    stack = alloc_xenheap_pages(STACK_ORDER, memflags);
+
+    if ( stack )
+        memguard_guard_stack(stack);
+
+    return stack;
+}
+
 static int cpu_smpboot_alloc(unsigned int cpu)
 {
     struct cpu_info *info;
@@ -1035,15 +1052,10 @@ static int cpu_smpboot_alloc(unsigned int cpu)
     if ( node != NUMA_NO_NODE )
         memflags = MEMF_node(node);
 
-    if ( stack_base[cpu] == NULL )
-    {
-        stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags);
-        if ( !stack_base[cpu] )
+    if ( stack_base[cpu] == NULL &&
+         (stack_base[cpu] = cpu_alloc_stack(cpu)) == NULL )
             goto out;
 
-        memguard_guard_stack(stack_base[cpu]);
-    }
-
     info = get_cpu_info_from_stack((unsigned long)stack_base[cpu]);
     info->processor_id = cpu;
     info->per_cpu_offset = __per_cpu_offset[cpu];
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 4103763f6391..9cd4fe417e14 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -215,8 +215,9 @@ SECTIONS
   } PHDR(text)
   DECL_SECTION(.init.data) {
 #endif
+       . = ALIGN(STACK_SIZE);
+       *(.init.bss.stack_aligned)
 
-       . = ALIGN(POINTER_ALIGN);
        __initdata_cf_clobber_start = .;
        *(.init.data.cf_clobber)
        *(.init.rodata.cf_clobber)
@@ -300,7 +301,6 @@ SECTIONS
 
   DECL_SECTION(.bss) {
        __bss_start = .;
-       *(.bss.stack_aligned)
        *(.bss.page_aligned*)
        . = ALIGN(PAGE_SIZE);
        __per_cpu_start = .;
-- 
2.11.0


Reply via email to