On Mon, Nov 27, 2017 at 10:14:24AM +0100, Peter Zijlstra wrote:

> But if we can freely spill here, should we not do the kernel switch
> instead of doing this user mapping? The way I understand things, the
> less of these magic mappings we have the better.

Turns out, we don't need more scratch regs at all.

The below seems to survive tools/testing/selftests/x86/sigreturn_64
which exercises the ESPFIX crud.

---
 arch/x86/entry/entry_64.S   | 11 ++++++++---
 arch/x86/kernel/espfix_64.c | 10 ++--------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index df0152bee8a8..289ba2680952 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -825,7 +825,9 @@ ENTRY(native_iret)
         */
 
        pushq   %rdi                            /* Stash user RDI */
-       SWAPGS
+       SWAPGS                                  /* to kernel GS */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
+
        movq    PER_CPU_VAR(espfix_waddr), %rdi
        movq    %rax, (0*8)(%rdi)               /* user RAX */
        movq    (1*8)(%rsp), %rax               /* user RIP */
@@ -841,7 +843,6 @@ ENTRY(native_iret)
        /* Now RAX == RSP. */
 
        andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
-       popq    %rdi                            /* Restore user RDI */
 
        /*
         * espfix_stack[31:16] == 0.  The page tables are set up such that
@@ -852,7 +853,11 @@ ENTRY(native_iret)
         * still points to an RO alias of the ESPFIX stack.
         */
        orq     PER_CPU_VAR(espfix_stack), %rax
-       SWAPGS
+
+       SWITCH_TO_USER_CR3 scratch_reg=%rdi     /* to user CR3 */
+       SWAPGS                                  /* to user GS */
+       popq    %rdi                            /* Restore user RDI */
+
        movq    %rax, %rsp
        UNWIND_HINT_IRET_REGS offset=8
 
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 8bb116d73aaa..8826475d786c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -61,8 +61,8 @@
 #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /* This contains the *bottom* address of the espfix stack */
-DEFINE_PER_CPU_USER_MAPPED(unsigned long, espfix_stack);
-DEFINE_PER_CPU_USER_MAPPED(unsigned long, espfix_waddr);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 /* Initialization mutex - should this be a spinlock? */
 static DEFINE_MUTEX(espfix_init_mutex);
@@ -225,10 +225,4 @@ void init_espfix_ap(int cpu)
        per_cpu(espfix_stack, cpu) = addr;
        per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
                                      + (addr & ~PAGE_MASK);
-       /*
-        * _PAGE_GLOBAL is not really required.  This is not a hot
-        * path, but we do it here for consistency.
-        */
-       kaiser_add_mapping((unsigned long)stack_page, PAGE_SIZE,
-                       __PAGE_KERNEL | _PAGE_GLOBAL);
 }

Reply via email to