64-bit syscalls currently have an optimization in which they are
called with partial pt_regs.  A small handful require full pt_regs.

In the 32-bit and compat cases, I cleaned this up by forcing full
pt_regs for all syscalls.  The performance hit doesn't really matter.

I want to clean up the 64-bit case as well, but I don't want to hurt
fast path performance.  To do that, I want to force the syscalls
that use pt_regs onto the slow path.  This will enable us to make
slow path syscalls be real ABI-compliant C functions.

Use the new syscall entry qualification machinery for this.
stub_clone is now stub_clone/ptregs.

The next patch will eliminate the stubs, and we'll just have
sys_clone/ptregs.

Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/entry/entry_64.S              | 17 +++++++++--------
 arch/x86/entry/syscall_64.c            | 18 ++++++++++++++++++
 arch/x86/entry/syscalls/syscall_64.tbl | 16 ++++++++--------
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3cfceb6..a698b8092831 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -182,7 +182,7 @@ entry_SYSCALL_64_fastpath:
 #endif
        ja      1f                              /* return -ENOSYS (already in 
pt_regs->ax) */
        movq    %r10, %rcx
-       call    *sys_call_table(, %rax, 8)
+       call    *sys_call_table_fastpath_64(, %rax, 8)
        movq    %rax, RAX(%rsp)
 1:
 /*
@@ -238,13 +238,6 @@ tracesys:
        movq    %rsp, %rdi
        movl    $AUDIT_ARCH_X86_64, %esi
        call    syscall_trace_enter_phase1
-       test    %rax, %rax
-       jnz     tracesys_phase2                 /* if needed, run the slow path 
*/
-       RESTORE_C_REGS_EXCEPT_RAX               /* else restore clobbered regs 
*/
-       movq    ORIG_RAX(%rsp), %rax
-       jmp     entry_SYSCALL_64_fastpath       /* and return to the fast path 
*/
-
-tracesys_phase2:
        SAVE_EXTRA_REGS
        movq    %rsp, %rdi
        movl    $AUDIT_ARCH_X86_64, %esi
@@ -355,6 +348,14 @@ opportunistic_sysret_failed:
        jmp     restore_c_regs_and_iret
 END(entry_SYSCALL_64)
 
+ENTRY(stub_ptregs_64)
+       /*
+        * Syscalls marked as needing ptregs that go through the fast path
+        * land here.  We transfer to the slow path.
+        */
+       addq    $8, %rsp
+       jmp     tracesys
+END(stub_ptregs_64)
 
        .macro FORK_LIKE func
 ENTRY(stub_\func)
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index a1d408772ae6..601745c667ce 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -22,3 +22,21 @@ asmlinkage const sys_call_ptr_t 
sys_call_table[__NR_syscall_max+1] = {
        [0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
+
+#undef __SYSCALL_64
+
+extern long stub_ptregs_64(unsigned long, unsigned long, unsigned long, 
unsigned long, unsigned long, unsigned long);
+
+#define __SYSCALL_64_QUAL_(nr, sym) [nr] = sym,
+#define __SYSCALL_64_QUAL_ptregs(nr, sym) [nr] = stub_ptregs_64,
+
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(nr, sym)
+
+asmlinkage const sys_call_ptr_t sys_call_table_fastpath_64[__NR_syscall_max+1] 
= {
+       /*
+        * Smells like a compiler bug -- it doesn't work
+        * when the & below is removed.
+        */
+       [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842fdf1f6..6b9db2e338f4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -21,7 +21,7 @@
 12     common  brk                     sys_brk
 13     64      rt_sigaction            sys_rt_sigaction
 14     common  rt_sigprocmask          sys_rt_sigprocmask
-15     64      rt_sigreturn            stub_rt_sigreturn
+15     64      rt_sigreturn            stub_rt_sigreturn/ptregs
 16     64      ioctl                   sys_ioctl
 17     common  pread64                 sys_pread64
 18     common  pwrite64                sys_pwrite64
@@ -62,10 +62,10 @@
 53     common  socketpair              sys_socketpair
 54     64      setsockopt              sys_setsockopt
 55     64      getsockopt              sys_getsockopt
-56     common  clone                   stub_clone
-57     common  fork                    stub_fork
-58     common  vfork                   stub_vfork
-59     64      execve                  stub_execve
+56     common  clone                   stub_clone/ptregs
+57     common  fork                    stub_fork/ptregs
+58     common  vfork                   stub_vfork/ptregs
+59     64      execve                  stub_execve/ptregs
 60     common  exit                    sys_exit
 61     common  wait4                   sys_wait4
 62     common  kill                    sys_kill
@@ -328,7 +328,7 @@
 319    common  memfd_create            sys_memfd_create
 320    common  kexec_file_load         sys_kexec_file_load
 321    common  bpf                     sys_bpf
-322    64      execveat                stub_execveat
+322    64      execveat                stub_execveat/ptregs
 323    common  userfaultfd             sys_userfaultfd
 324    common  membarrier              sys_membarrier
 
@@ -344,7 +344,7 @@
 517    x32     recvfrom                compat_sys_recvfrom
 518    x32     sendmsg                 compat_sys_sendmsg
 519    x32     recvmsg                 compat_sys_recvmsg
-520    x32     execve                  stub_x32_execve
+520    x32     execve                  stub_x32_execve/ptregs
 521    x32     ptrace                  compat_sys_ptrace
 522    x32     rt_sigpending           compat_sys_rt_sigpending
 523    x32     rt_sigtimedwait         compat_sys_rt_sigtimedwait
@@ -369,4 +369,4 @@
 542    x32     getsockopt              compat_sys_getsockopt
 543    x32     io_setup                compat_sys_io_setup
 544    x32     io_submit               compat_sys_io_submit
-545    x32     execveat                stub_x32_execveat
+545    x32     execveat                stub_x32_execveat/ptregs
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to