After an interrupt/exception in userland, the kernel is entered and it switches the stack to the PTI stack which is mapped both in the kernel and in the user page-table. When executing the interrupt function, switch to the kernel stack (which is mapped only in the kernel page-table) so that no kernel data leak to the userland through the stack.
For now, only changes IDT handlers which have no argument other than the pt_regs registers. Signed-off-by: Alexandre Chartre <alexandre.char...@oracle.com> --- arch/x86/include/asm/idtentry.h | 43 +++++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/mce/core.c | 2 +- arch/x86/kernel/traps.c | 4 +-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 4b4aca2b1420..3595a31947b3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -10,10 +10,49 @@ #include <linux/hardirq.h> #include <asm/irq_stack.h> +#include <asm/pti.h> bool idtentry_enter_nmi(struct pt_regs *regs); void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state); +/* + * The CALL_ON_STACK_* macro call the specified function either directly + * if no stack is provided, or on the specified stack. + */ +#define CALL_ON_STACK_1(stack, func, arg1) \ + ((stack) ? \ + asm_call_on_stack_1(stack, \ + (void (*)(void))(func), (void *)(arg1)) : \ + func(arg1)) + +/* + * Functions to return the top of the kernel stack if we are using the + * user page-table (and thus not running with the kernel stack). If we + * are using the kernel page-table (and so already using the kernel + * stack) when it returns NULL. + */ +static __always_inline void *pti_kernel_stack(struct pt_regs *regs) +{ + unsigned long stack; + + if (pti_enabled() && user_mode(regs)) { + stack = (unsigned long)task_top_of_kernel_stack(current); + return (void *)(stack - 8); + } else { + return NULL; + } +} + +/* + * Wrappers to run an IDT handler on the kernel stack if we are not + * already using this stack. + */ +static __always_inline +void run_idt(void (*func)(struct pt_regs *), struct pt_regs *regs) +{ + CALL_ON_STACK_1(pti_kernel_stack(regs), func, regs); +} + /** * DECLARE_IDTENTRY - Declare functions for simple IDT entry points * No error code pushed by hardware @@ -55,7 +94,7 @@ __visible noinstr void func(struct pt_regs *regs) \ irqentry_state_t state = irqentry_enter(regs); \ \ instrumentation_begin(); \ - __##func (regs); \ + run_idt(__##func, regs); \ instrumentation_end(); \ irqentry_exit(regs, state); \ } \ @@ -271,7 +310,7 @@ __visible noinstr void func(struct pt_regs *regs) \ instrumentation_begin(); \ __irq_enter_raw(); \ kvm_set_cpu_l1tf_flush_l1d(); \ - __##func (regs); \ + run_idt(__##func, regs); \ __irq_exit_raw(); \ instrumentation_end(); \ irqentry_exit(regs, state); \ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4102b866e7c0..9407c3cd9355 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2035,7 +2035,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check) unsigned long dr7; dr7 = local_db_save(); - exc_machine_check_user(regs); + run_idt(exc_machine_check_user, regs); local_db_restore(dr7); } #else diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 09b22a611d99..5161385b3670 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -257,7 +257,7 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) state = irqentry_enter(regs); instrumentation_begin(); - handle_invalid_op(regs); + run_idt(handle_invalid_op, regs); instrumentation_end(); irqentry_exit(regs, state); } @@ -647,7 +647,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); instrumentation_begin(); - do_int3_user(regs); + run_idt(do_int3_user, regs); instrumentation_end(); irqentry_exit_to_user_mode(regs); } else { -- 2.18.4