All manipulations of PER_CPU(old_rsp) in C code are removed:
it is not used on SYSRET return, so storing anything there is
pointless.

This also allows us to get rid of thread_struct::usersp,
which was needed only to set PER_CPU(old_rsp) for correct
return from fork/clone.

Tweak a few comments as well: we no longer have "partial stack frame",
ever.

Signed-off-by: Denys Vlasenko <dvlas...@redhat.com>
CC: Linus Torvalds <torva...@linux-foundation.org>
CC: Steven Rostedt <rost...@goodmis.org>
CC: Ingo Molnar <mi...@kernel.org>
CC: Borislav Petkov <b...@alien8.de>
CC: "H. Peter Anvin" <h...@zytor.com>
CC: Andy Lutomirski <l...@amacapital.net>
CC: Oleg Nesterov <o...@redhat.com>
CC: Frederic Weisbecker <fweis...@gmail.com>
CC: Alexei Starovoitov <a...@plumgrid.com>
CC: Will Drewry <w...@chromium.org>
CC: Kees Cook <keesc...@chromium.org>
CC: x...@kernel.org
CC: linux-kernel@vger.kernel.org
---

Run-tested, including with PARAVIRT on.

 arch/x86/include/asm/processor.h |  6 ------
 arch/x86/kernel/entry_64.S       | 14 ++++++--------
 arch/x86/kernel/process_64.c     |  5 -----
 3 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 997e6a1..66a1954 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -496,7 +496,6 @@ struct thread_struct {
 #ifdef CONFIG_X86_32
        unsigned long           sysenter_cs;
 #else
-       unsigned long           usersp; /* Copy from PDA */
        unsigned short          es;
        unsigned short          ds;
        unsigned short          fsindex;
@@ -908,11 +907,6 @@ extern unsigned long thread_saved_pc(struct task_struct 
*tsk);
 #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
 #endif /* CONFIG_X86_64 */
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3054a9d..cb86db0 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -15,10 +15,8 @@
  * after an interrupt and after each system call.
  *
  * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers up to R11.
- * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
@@ -219,7 +217,7 @@ ENDPROC(native_usergs_sysret64)
  * Interrupts are off on entry.
  * Only called from user space.
  *
- * When user can change the frames always force IRET. That is because
+ * When user can change pt_regs->foo always force IRET. That is because
  * it deals with uncanonical addresses better. SYSRET has trouble
  * with them due to bugs in both AMD and Intel CPUs.
  */
@@ -303,7 +301,7 @@ int_ret_from_sys_call_fixup:
        FIXUP_TOP_OF_STACK %r11
        jmp int_ret_from_sys_call
 
-       /* Do syscall tracing */
+       /* Do syscall entry tracing */
 tracesys:
        movq %rsp, %rdi
        movq $AUDIT_ARCH_X86_64, %rsi
@@ -339,11 +337,11 @@ tracesys_phase2:
        movq %r10,%rcx  /* fixup for C */
        call *sys_call_table(,%rax,8)
        movq %rax,RAX(%rsp)
-       /* Use IRET because user could have changed frame */
+       /* Use IRET because user could have changed pt_regs->foo */
 
 /*
  * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
+ * Has correct iret frame.
  */
 GLOBAL(int_ret_from_sys_call)
        DISABLE_INTERRUPTS(CLBR_NONE)
@@ -374,7 +372,7 @@ int_careful:
        TRACE_IRQS_OFF
        jmp int_with_check
 
-       /* handle signals and tracing -- both require a full stack frame */
+       /* handle signals and tracing -- both require a full pt_regs */
 int_very_careful:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e8c124a..14df2be 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
        p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
        childregs = task_pt_regs(p);
        p->thread.sp = (unsigned long) childregs;
-       p->thread.usersp = me->thread.usersp;
        set_tsk_thread_flag(p, TIF_FORK);
        p->thread.io_bitmap_ptr = NULL;
 
@@ -235,10 +234,8 @@ start_thread_common(struct pt_regs *regs, unsigned long 
new_ip,
        loadsegment(es, _ds);
        loadsegment(ds, _ds);
        load_gs_index(0);
-       current->thread.usersp  = new_sp;
        regs->ip                = new_ip;
        regs->sp                = new_sp;
-       this_cpu_write(old_rsp, new_sp);
        regs->cs                = _cs;
        regs->ss                = _ss;
        regs->flags             = X86_EFLAGS_IF;
@@ -398,8 +395,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
        /*
         * Switch the PDA and FPU contexts.
         */
-       prev->usersp = this_cpu_read(old_rsp);
-       this_cpu_write(old_rsp, next->usersp);
        this_cpu_write(current_task, next_p);
 
        /*
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to