All manipulations of PER_CPU(old_rsp) in C code are removed: it is not used on SYSRET return, so storing anything there is pointless.
This also allows us to get rid of thread_struct::usersp, which was needed only to set PER_CPU(old_rsp) for correct return from fork/clone. Tweak a few comments as well: we no longer have "partial stack frame", ever. Signed-off-by: Denys Vlasenko <dvlas...@redhat.com> CC: Linus Torvalds <torva...@linux-foundation.org> CC: Steven Rostedt <rost...@goodmis.org> CC: Ingo Molnar <mi...@kernel.org> CC: Borislav Petkov <b...@alien8.de> CC: "H. Peter Anvin" <h...@zytor.com> CC: Andy Lutomirski <l...@amacapital.net> CC: Oleg Nesterov <o...@redhat.com> CC: Frederic Weisbecker <fweis...@gmail.com> CC: Alexei Starovoitov <a...@plumgrid.com> CC: Will Drewry <w...@chromium.org> CC: Kees Cook <keesc...@chromium.org> CC: x...@kernel.org CC: linux-kernel@vger.kernel.org --- Run-tested, including with PARAVIRT on. arch/x86/include/asm/processor.h | 6 ------ arch/x86/kernel/entry_64.S | 14 ++++++-------- arch/x86/kernel/process_64.c | 5 ----- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 997e6a1..66a1954 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -496,7 +496,6 @@ struct thread_struct { #ifdef CONFIG_X86_32 unsigned long sysenter_cs; #else - unsigned long usersp; /* Copy from PDA */ unsigned short es; unsigned short ds; unsigned short fsindex; @@ -908,11 +907,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); -/* - * User space RSP while inside the SYSCALL fast path - */ -DECLARE_PER_CPU(unsigned long, old_rsp); - #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3054a9d..cb86db0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -15,10 +15,8 @@ * after an interrupt and after each system call. * * A note on terminology: - * - top of stack: Architecture defined interrupt frame from SS to RIP + * - iret frame: Architecture defined interrupt frame from SS to RIP * at the top of the kernel process stack. - * - partial stack frame: partially saved registers up to R11. - * - full stack frame: Like partial stack frame, but all register saved. * * Some macro usage: * - CFI macros are used to generate dwarf2 unwind information for better @@ -219,7 +217,7 @@ ENDPROC(native_usergs_sysret64) * Interrupts are off on entry. * Only called from user space. * - * When user can change the frames always force IRET. That is because + * When user can change pt_regs->foo always force IRET. That is because * it deals with uncanonical addresses better. SYSRET has trouble * with them due to bugs in both AMD and Intel CPUs. */ @@ -303,7 +301,7 @@ int_ret_from_sys_call_fixup: FIXUP_TOP_OF_STACK %r11 jmp int_ret_from_sys_call - /* Do syscall tracing */ + /* Do syscall entry tracing */ tracesys: movq %rsp, %rdi movq $AUDIT_ARCH_X86_64, %rsi @@ -339,11 +337,11 @@ tracesys_phase2: movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) movq %rax,RAX(%rsp) - /* Use IRET because user could have changed frame */ + /* Use IRET because user could have changed pt_regs->foo */ /* * Syscall return path ending with IRET. - * Has correct top of stack, but partial stack frame. + * Has correct iret frame. */ GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) @@ -374,7 +372,7 @@ int_careful: TRACE_IRQS_OFF jmp int_with_check - /* handle signals and tracing -- both require a full stack frame */ + /* handle signals and tracing -- both require a full pt_regs */ int_very_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e8c124a..14df2be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); p->thread.io_bitmap_ptr = NULL; @@ -235,10 +234,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); - current->thread.usersp = new_sp; regs->ip = new_ip; regs->sp = new_sp; - this_cpu_write(old_rsp, new_sp); regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; @@ -398,8 +395,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = this_cpu_read(old_rsp); - this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); /* -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/