On 64-bit kernels, we used to assume that TSS.sp0 was the current
top of stack.  With the addition of an entry trampoline, this will
no longer be the case.  Store the current top of stack in TSS.sp1,
which is otherwise unused but shares the same cacheline.

Reviewed-by: Thomas Gleixner <t...@linutronix.de>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/include/asm/processor.h   | 18 +++++++++++++-----
 arch/x86/include/asm/thread_info.h |  2 +-
 arch/x86/kernel/asm-offsets_64.c   |  1 +
 arch/x86/kernel/process.c          | 10 ++++++++++
 arch/x86/kernel/process_64.c       |  1 +
 5 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 48d44fae3d27..3a09e5571a92 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -305,7 +305,13 @@ struct x86_hw_tss {
 struct x86_hw_tss {
        u32                     reserved1;
        u64                     sp0;
+
+       /*
+        * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+        * Linux does not use ring 1, so sp1 is not otherwise needed.
+        */
        u64                     sp1;
+
        u64                     sp2;
        u64                     reserved2;
        u64                     ist[7];
@@ -364,6 +370,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
 
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
 #endif
 
 /*
@@ -535,12 +543,12 @@ static inline void native_swapgs(void)
 
 static inline unsigned long current_top_of_stack(void)
 {
-#ifdef CONFIG_X86_64
-       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
-       /* sp0 on x86_32 is special in and around vm86 mode. */
+       /*
+        *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
+        *  and around vm86 mode and sp0 on x86_64 is special because of the
+        *  entry trampoline.
+        */
        return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
 }
 
 static inline bool on_thread_stack(void)
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..44a04999791e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * 
const stack,
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
 #endif
 
 #endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..ad649a8a74a0 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -63,6 +63,7 @@ int main(void)
 
        OFFSET(TSS_ist, tss_struct, x86_tss.ist);
        OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+       OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
        BLANK();
 
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 35d674157fda..86e83762e3b3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, 
cpu_tss) = {
                 * Poison it.
                 */
                .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+               /*
+                * .sp1 is cpu_current_top_of_stack.  The init task never
+                * runs user code, but cpu_current_top_of_stack should still
+                * be well defined before the first context switch.
+                */
+               .sp1 = TOP_OF_INIT_STACK,
+#endif
+
 #ifdef CONFIG_X86_32
                .ss0 = __KERNEL_DS,
                .ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..bafe65b08697 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -462,6 +462,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
         * Switch the PDA and FPU contexts.
         */
        this_cpu_write(current_task, next_p);
+       this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
        /* Reload sp0. */
        update_sp0(next_p);
-- 
2.13.6

Reply via email to