An unintended consequence of the BSP using cpu0_stack[] is that writeable mappings to the BSPs shadow stacks are retained in the bss. This renders CET-SS almost useless, as an attacker can update both return addresses and the ret will not fault.
We specifically don't want the shatter the superpage mapping .data/.bss, so the only way to fix this is to not have the BSP stack in the main Xen image. Break cpu_alloc_stack() out of cpu_smpboot_alloc(), and dynamically allocate the BSP stack as early as reasonable in __start_xen(). As a consequence, there is no need to delay the BSP's memguard_guard_stack() call. Copy the top of cpu info block just before switching to use the new stack. Fix a latent bug by setting %rsp to info->guest_cpu_user_regs rather than ->es; this would be buggy if reinit_bsp_stack() called schedule() (which rewrites the GPR block) directly, but luckily it doesn't. Finally, move cpu0_stack[] into .init, so it can be reclaimed after boot. Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> --- CC: Jan Beulich <jbeul...@suse.com> CC: Roger Pau Monné <roger....@citrix.com> CC: Wei Liu <w...@xen.org> CC: Marek Marczykowski-Górecki <marma...@invisiblethingslab.com> --- xen/arch/x86/include/asm/smp.h | 2 ++ xen/arch/x86/setup.c | 20 +++++++++++++------- xen/arch/x86/smpboot.c | 26 +++++++++++++++++++------- xen/arch/x86/xen.lds.S | 4 ++-- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/xen/arch/x86/include/asm/smp.h b/xen/arch/x86/include/asm/smp.h index 1747772d232e..41a3b6a0dadf 100644 --- a/xen/arch/x86/include/asm/smp.h +++ b/xen/arch/x86/include/asm/smp.h @@ -85,6 +85,8 @@ extern cpumask_t **socket_cpumask; extern unsigned int disabled_cpus; extern bool unaccounted_cpus; +void *cpu_alloc_stack(unsigned int cpu); + #endif /* !__ASSEMBLY__ */ #endif diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 22a9885dee5c..1f816ce05a07 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -148,7 +148,7 @@ cpumask_t __read_mostly cpu_present_map; unsigned long __read_mostly xen_phys_start; -char __section(".bss.stack_aligned") __aligned(STACK_SIZE) +char __section("init.bss.stack_aligned") __aligned(STACK_SIZE) cpu0_stack[STACK_SIZE]; /* Used by the BSP/AP paths to find the higher half stack mapping to use. */ @@ -712,7 +712,6 @@ static void __init noreturn reinit_bsp_stack(void) percpu_traps_init(); stack_base[0] = stack; - memguard_guard_stack(stack); rc = setup_cpu_root_pgt(0); if ( rc ) @@ -886,6 +885,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) { char *memmap_type = NULL; char *cmdline, *kextra, *loader; + void *bsp_stack; + struct cpu_info *info = get_cpu_info(), *bsp_info; unsigned int initrdidx, num_parked = 0; multiboot_info_t *mbi; module_t *mod; @@ -918,7 +919,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) /* Full exception support from here on in. */ rdmsrl(MSR_EFER, this_cpu(efer)); - asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) ); + asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) ); /* Enable NMIs. Our loader (e.g. Tboot) may have left them disabled. */ enable_nmis(); @@ -1703,6 +1704,10 @@ void __init noreturn __start_xen(unsigned long mbi_p) */ vm_init(); + bsp_stack = cpu_alloc_stack(0); + if ( !bsp_stack ) + panic("No memory for BSP stack\n"); + console_init_ring(); vesa_init(); @@ -1974,17 +1979,18 @@ void __init noreturn __start_xen(unsigned long mbi_p) if ( bsp_delay_spec_ctrl ) { - struct cpu_info *info = get_cpu_info(); - info->spec_ctrl_flags &= ~SCF_use_shadow; barrier(); wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); info->last_spec_ctrl = default_xen_spec_ctrl; } - /* Jump to the 1:1 virtual mappings of cpu0_stack. */ + /* Copy the cpu info block, and move onto the BSP stack. */ + bsp_info = get_cpu_info_from_stack((unsigned long)bsp_stack); + *bsp_info = *info; + asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" :: - [stk] "g" (__va(__pa(get_stack_bottom()))), + [stk] "g" (&bsp_info->guest_cpu_user_regs), [fn] "i" (reinit_bsp_stack) : "memory"); unreachable(); } diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 709704d71ada..b46fd9ab18e4 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -1023,6 +1023,23 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove) } } +void *cpu_alloc_stack(unsigned int cpu) +{ + nodeid_t node = cpu_to_node(cpu); + unsigned int memflags = 0; + void *stack; + + if ( node != NUMA_NO_NODE ) + memflags = MEMF_node(node); + + stack = alloc_xenheap_pages(STACK_ORDER, memflags); + + if ( stack ) + memguard_guard_stack(stack); + + return stack; +} + static int cpu_smpboot_alloc(unsigned int cpu) { struct cpu_info *info; @@ -1035,15 +1052,10 @@ static int cpu_smpboot_alloc(unsigned int cpu) if ( node != NUMA_NO_NODE ) memflags = MEMF_node(node); - if ( stack_base[cpu] == NULL ) - { - stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags); - if ( !stack_base[cpu] ) + if ( stack_base[cpu] == NULL && + (stack_base[cpu] = cpu_alloc_stack(cpu)) == NULL ) goto out; - memguard_guard_stack(stack_base[cpu]); - } - info = get_cpu_info_from_stack((unsigned long)stack_base[cpu]); info->processor_id = cpu; info->per_cpu_offset = __per_cpu_offset[cpu]; diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S index 4103763f6391..9cd4fe417e14 100644 --- a/xen/arch/x86/xen.lds.S +++ b/xen/arch/x86/xen.lds.S @@ -215,8 +215,9 @@ SECTIONS } PHDR(text) DECL_SECTION(.init.data) { #endif + . = ALIGN(STACK_SIZE); + *(.init.bss.stack_aligned) - . = ALIGN(POINTER_ALIGN); __initdata_cf_clobber_start = .; *(.init.data.cf_clobber) *(.init.rodata.cf_clobber) @@ -300,7 +301,6 @@ SECTIONS DECL_SECTION(.bss) { __bss_start = .; - *(.bss.stack_aligned) *(.bss.page_aligned*) . = ALIGN(PAGE_SIZE); __per_cpu_start = .; -- 2.11.0