ret_from_fork is written in asm, slightly differently, for x86_32 and
x86_64.  Convert it to C.

This is a straight conversion without any particular cleverness.  As a
further cleanup, the code that sets up the ret_from_fork argument registers
could be adjusted to put the arguments in the correct registers.

This will cause the ORC unwinder to find pt_regs even for kernel threads on
x86_64.  This seems harmless.

The 32-bit comment above the now-deleted schedule_tail_wrapper was
obsolete: the encode_frame_pointer mechanism (see copy_thread()) solves the
same problem more cleanly.

Cc: Josh Poimboeuf <jpoim...@redhat.com>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/entry/common.c          | 23 ++++++++++++++
 arch/x86/entry/entry_32.S        | 51 +++++---------------------------
 arch/x86/entry/entry_64.S        | 33 +++++----------------
 arch/x86/include/asm/switch_to.h |  2 +-
 arch/x86/kernel/process.c        |  2 +-
 arch/x86/kernel/process_32.c     |  2 +-
 arch/x86/kernel/unwind_orc.c     |  2 +-
 7 files changed, 43 insertions(+), 72 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 95776f16c1cb..ef1c65938a6b 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -214,6 +214,29 @@ SYSCALL_DEFINE0(ni_syscall)
        return -ENOSYS;
 }
 
+void ret_from_fork(struct task_struct *prev,
+                  int (*kernel_thread_fn)(void *),
+                  void *kernel_thread_arg,
+                  struct pt_regs *user_regs);
+
+__visible void noinstr ret_from_fork(struct task_struct *prev,
+                                    int (*kernel_thread_fn)(void *),
+                                    void *kernel_thread_arg,
+                                    struct pt_regs *user_regs)
+{
+       instrumentation_begin();
+
+       schedule_tail(prev);
+
+       if (kernel_thread_fn) {
+               kernel_thread_fn(kernel_thread_arg);
+               user_regs->ax = 0;
+       }
+
+       instrumentation_end();
+       syscall_exit_to_user_mode(user_regs);
+}
+
 #ifdef CONFIG_XEN_PV
 #ifndef CONFIG_PREEMPTION
 /*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017e6161..7113d259727f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -805,26 +805,6 @@ SYM_CODE_START(__switch_to_asm)
 SYM_CODE_END(__switch_to_asm)
 .popsection
 
-/*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-.pushsection .text, "ax"
-SYM_FUNC_START(schedule_tail_wrapper)
-       FRAME_BEGIN
-
-       pushl   %eax
-       call    schedule_tail
-       popl    %eax
-
-       FRAME_END
-       ret
-SYM_FUNC_END(schedule_tail_wrapper)
-.popsection
-
 /*
  * A newly forked process directly context switches into this address.
  *
@@ -833,29 +813,14 @@ SYM_FUNC_END(schedule_tail_wrapper)
  * edi: kernel thread arg
  */
 .pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
-       call    schedule_tail_wrapper
-
-       testl   %ebx, %ebx
-       jnz     1f              /* kernel threads are uncommon */
-
-2:
-       /* When we fork, we trace the syscall return in the child, too. */
-       movl    %esp, %eax
-       call    syscall_exit_to_user_mode
-       jmp     .Lsyscall_32_done
-
-       /* kernel thread */
-1:     movl    %edi, %eax
-       CALL_NOSPEC ebx
-       /*
-        * A kernel thread is allowed to return here after successfully
-        * calling kernel_execve().  Exit to userspace to complete the execve()
-        * syscall.
-        */
-       movl    $0, PT_EAX(%esp)
-       jmp     2b
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_START(asm_ret_from_fork)
+       movl    %ebx, %edx
+       movl    %edi, %ecx
+       pushl   %esp
+       call    ret_from_fork
+       addl    $4, %esp
+       jmp     .Lsyscall_32_done
+SYM_CODE_END(asm_ret_from_fork)
 .popsection
 
 SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cad08703c4ad..0f7df8861ac1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -273,35 +273,18 @@ SYM_FUNC_END(__switch_to_asm)
  * rax: prev task we switched from
  * rbx: kernel thread func (NULL for user thread)
  * r12: kernel thread arg
+ * rbp: encoded frame pointer for the fp unwinder
  */
 .pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
-       UNWIND_HINT_EMPTY
-       movq    %rax, %rdi
-       call    schedule_tail                   /* rdi: 'prev' task parameter */
-
-       testq   %rbx, %rbx                      /* from kernel_thread? */
-       jnz     1f                              /* kernel threads are uncommon 
*/
-
-2:
+SYM_CODE_START(asm_ret_from_fork)
        UNWIND_HINT_REGS
-       movq    %rsp, %rdi
-       call    syscall_exit_to_user_mode       /* returns with IRQs disabled */
+       movq    %rax, %rdi
+       movq    %rbx, %rsi
+       movq    %r12, %rdx
+       movq    %rsp, %rcx
+       call    ret_from_fork
        jmp     swapgs_restore_regs_and_return_to_usermode
-
-1:
-       /* kernel thread */
-       UNWIND_HINT_EMPTY
-       movq    %r12, %rdi
-       CALL_NOSPEC rbx
-       /*
-        * A kernel thread is allowed to return here after successfully
-        * calling kernel_execve().  Exit to userspace to complete the execve()
-        * syscall.
-        */
-       movq    $0, RAX(%rsp)
-       jmp     2b
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(asm_ret_from_fork)
 .popsection
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 9f69cc497f4b..fcb9b02a1269 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,7 +12,7 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 __visible struct task_struct *__switch_to(struct task_struct *prev,
                                          struct task_struct *next);
 
-asmlinkage void ret_from_fork(void);
+asmlinkage void asm_ret_from_fork(void);
 
 /*
  * This is the structure pointed to by thread.sp for an inactive task.  The
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f6f16df04cb9..34efbca08738 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -135,7 +135,7 @@ int copy_thread(unsigned long clone_flags, unsigned long 
sp, unsigned long arg,
        frame = &fork_frame->frame;
 
        frame->bp = encode_frame_pointer(childregs);
-       frame->ret_addr = (unsigned long) ret_from_fork;
+       frame->ret_addr = (unsigned long) asm_ret_from_fork;
        p->thread.sp = (unsigned long) fork_frame;
        p->thread.io_bitmap = NULL;
        memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4f2f54e1281c..bf8aa15ac652 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(start_thread);
  * more flexibility.
  *
  * The return value (in %ax) will be the "prev" task after
- * the task-switch, and shows up in ret_from_fork in entry.S,
+ * the task-switch, and shows up in asm_ret_from_fork in entry_32.S,
  * for example.
  */
 __visible __notrace_funcgraph struct task_struct *
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 73f800100066..c6e7235c6d9f 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -659,7 +659,7 @@ void __unwind_start(struct unwind_state *state, struct 
task_struct *task,
                state->sp = task->thread.sp + sizeof(*frame);
                state->bp = READ_ONCE_NOCHECK(frame->bp);
                state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
-               state->signal = (void *)state->ip == ret_from_fork;
+               state->signal = (void *)state->ip == asm_ret_from_fork;
        }
 
        if (get_stack_info((unsigned long *)state->sp, state->task,
-- 
2.29.2

Reply via email to