The ESPFIX stack is filled using assembly code. Move this code to a C
function so that it is easier to read and modify.

Signed-off-by: Alexandre Chartre <alexandre.char...@oracle.com>
---
 arch/x86/entry/entry_64.S   | 62 ++++++++++++++++++-------------------
 arch/x86/kernel/espfix_64.c | 41 ++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73e9cd47dc83..6e0b5b010e0b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -684,8 +684,10 @@ native_irq_return_ldt:
         * long (see ESPFIX_STACK_SIZE).  espfix_waddr points to the bottom
         * of the ESPFIX stack.
         *
-        * We clobber RAX and RDI in this code.  We stash RDI on the
-        * normal stack and RAX on the ESPFIX stack.
+        * We call into C code to fill the ESPFIX stack. We stash registers
+        * that the C function can clobber on the normal stack. The user RAX
+        * is stashed first so that it is adjacent to the iret frame which
+        * will be copied to the ESPFIX stack.
         *
         * The ESPFIX stack layout we set up looks like this:
         *
@@ -699,39 +701,37 @@ native_irq_return_ldt:
         * --- bottom of ESPFIX stack ---
         */
 
-       pushq   %rdi                            /* Stash user RDI */
-       SWAPGS                                  /* to kernel GS */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
-
-       movq    PER_CPU_VAR(espfix_waddr), %rdi
-       movq    %rax, (0*8)(%rdi)               /* user RAX */
-       movq    (1*8)(%rsp), %rax               /* user RIP */
-       movq    %rax, (1*8)(%rdi)
-       movq    (2*8)(%rsp), %rax               /* user CS */
-       movq    %rax, (2*8)(%rdi)
-       movq    (3*8)(%rsp), %rax               /* user RFLAGS */
-       movq    %rax, (3*8)(%rdi)
-       movq    (5*8)(%rsp), %rax               /* user SS */
-       movq    %rax, (5*8)(%rdi)
-       movq    (4*8)(%rsp), %rax               /* user RSP */
-       movq    %rax, (4*8)(%rdi)
-       /* Now RAX == RSP. */
-
-       andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
+       /* save registers */
+       pushq   %rax
+       pushq   %rdi
+       pushq   %rsi
+       pushq   %rdx
+       pushq   %rcx
+       pushq   %r8
+       pushq   %r9
+       pushq   %r10
+       pushq   %r11
 
        /*
-        * espfix_stack[31:16] == 0.  The page tables are set up such that
-        * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
-        * espfix_waddr for any X.  That is, there are 65536 RO aliases of
-        * the same page.  Set up RSP so that RSP[31:16] contains the
-        * respective 16 bits of the /userspace/ RSP and RSP nonetheless
-        * still points to an RO alias of the ESPFIX stack.
+        * fill_espfix_stack will copy the iret+rax frame to the ESPFIX
+        * stack and return with RAX containing a pointer to the ESPFIX
+        * stack.
         */
-       orq     PER_CPU_VAR(espfix_stack), %rax
+       leaq    8*8(%rsp), %rdi         /* points to the iret+rax frame */
+       call    fill_espfix_stack
 
-       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-       SWAPGS                                  /* to user GS */
-       popq    %rdi                            /* Restore user RDI */
+       /*
+        * RAX contains a pointer to the ESPFIX, so restore registers but
+        * RAX. RAX will be restored from the ESPFIX stack.
+        */
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+       popq    %rdi
 
        movq    %rax, %rsp
        UNWIND_HINT_IRET_REGS offset=8
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4fe7af58cfe1..ff4b5160b39c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -33,6 +33,7 @@
 #include <asm/pgalloc.h>
 #include <asm/setup.h>
 #include <asm/espfix.h>
+#include <asm/entry-common.h>
 
 /*
  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
@@ -205,3 +206,43 @@ void init_espfix_ap(int cpu)
        per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
                                      + (addr & ~PAGE_MASK);
 }
+
+/*
+ * iret frame with an additional user_rax register.
+ */
+struct iret_rax_frame {
+       unsigned long user_rax;
+       unsigned long rip;
+       unsigned long cs;
+       unsigned long rflags;
+       unsigned long rsp;
+       unsigned long ss;
+};
+
+noinstr unsigned long fill_espfix_stack(struct iret_rax_frame *frame)
+{
+       struct iret_rax_frame *espfix_frame;
+       unsigned long rsp;
+
+       native_swapgs();
+       user_pagetable_exit();
+
+       espfix_frame = (struct iret_rax_frame *)this_cpu_read(espfix_waddr);
+       *espfix_frame = *frame;
+
+       /*
+        * espfix_stack[31:16] == 0.  The page tables are set up such that
+        * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
+        * espfix_waddr for any X.  That is, there are 65536 RO aliases of
+        * the same page.  Set up RSP so that RSP[31:16] contains the
+        * respective 16 bits of the /userspace/ RSP and RSP nonetheless
+        * still points to an RO alias of the ESPFIX stack.
+        */
+       rsp = ((unsigned long)espfix_frame) & 0xffff0000;
+       rsp |= this_cpu_read(espfix_stack);
+
+       user_pagetable_enter();
+       native_swapgs();
+
+       return rsp;
+}
-- 
2.18.4

Reply via email to