From: Paul Gortmaker <pa...@kernel.org>

This reverts commit 4591766ff6552339fbaa3d3c71814faef1988c2f.

This (3/5) is a part of a fix from the v6.6.7 content.  However
during repeated boot testing on qemu-x86 (32 and 64) with NFS
root it hangs during the verbose printk of the PCI register layout
map.  The v6.6.6 and the v6.6.7 with 5 reverts does not have this
issue.  Note that NFS root seems to be key for some reason.

https://bugzilla.yoctoproject.org/show_bug.cgi?id=15463

Signed-off-by: Paul Gortmaker <pa...@kernel.org>
---
 arch/x86/entry/common.c          | 58 +-----------------------
 arch/x86/entry/entry_64_compat.S | 77 ++++++++++++++++++++++++++++++++
 arch/x86/include/asm/idtentry.h  |  4 --
 arch/x86/include/asm/proto.h     |  4 ++
 arch/x86/kernel/idt.c            |  2 +-
 arch/x86/xen/enlighten_pv.c      |  2 +-
 arch/x86/xen/xen-asm.S           |  2 +-
 7 files changed, 85 insertions(+), 64 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 0f22a037be66..4cec38985414 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -119,62 +119,7 @@ static __always_inline void do_syscall_32_irqs_on(struct 
pt_regs *regs, int nr)
        }
 }
 
-#ifdef CONFIG_IA32_EMULATION
-/**
- * int80_emulation - 32-bit legacy syscall entry
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls.  Instances of INT $0x80 can be found inline in
- * various programs and libraries.  It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method.  Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path.  It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * The arguments for the INT $0x80 based syscall are on stack in the
- * pt_regs structure:
- *   eax:                              system call number
- *   ebx, ecx, edx, esi, edi, ebp:     arg1 - arg 6
- */
-DEFINE_IDTENTRY_RAW(int80_emulation)
-{
-       int nr;
-
-       /* Establish kernel context. */
-       enter_from_user_mode(regs);
-
-       instrumentation_begin();
-       add_random_kstack_offset();
-
-       /*
-        * The low level idtentry code pushed -1 into regs::orig_ax
-        * and regs::ax contains the syscall number.
-        *
-        * User tracing code (ptrace or signal handlers) might assume
-        * that the regs::orig_ax contains a 32-bit number on invoking
-        * a 32-bit syscall.
-        *
-        * Establish the syscall convention by saving the 32bit truncated
-        * syscall number in regs::orig_ax and by invalidating regs::ax.
-        */
-       regs->orig_ax = regs->ax & GENMASK(31, 0);
-       regs->ax = -ENOSYS;
-
-       nr = syscall_32_enter(regs);
-
-       local_irq_enable();
-       nr = syscall_enter_from_user_mode_work(regs, nr);
-       do_syscall_32_irqs_on(regs, nr);
-
-       instrumentation_end();
-       syscall_exit_to_user_mode(regs);
-}
-#else /* CONFIG_IA32_EMULATION */
-
-/* Handles int $0x80 on a 32bit kernel */
+/* Handles int $0x80 */
 __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 {
        int nr = syscall_32_enter(regs);
@@ -193,7 +138,6 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs 
*regs)
        instrumentation_end();
        syscall_exit_to_user_mode(regs);
 }
-#endif /* !CONFIG_IA32_EMULATION */
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 306181e4fcb9..245697eb8485 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -277,3 +277,80 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
        ANNOTATE_NOENDBR
        int3
 SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction.  INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls.  Instances of INT $0x80 can be found inline in
+ * various programs and libraries.  It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method.  Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path.  It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6
+ */
+SYM_CODE_START(entry_INT80_compat)
+       UNWIND_HINT_ENTRY
+       ENDBR
+       /*
+        * Interrupts are off on entry.
+        */
+       ASM_CLAC                        /* Do this early to minimize exposure */
+       ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
+
+       /*
+        * User tracing code (ptrace or signal handlers) might assume that
+        * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+        * syscall.  Just in case the high bits are nonzero, zero-extend
+        * the syscall number.  (This could almost certainly be deleted
+        * with no ill effects.)
+        */
+       movl    %eax, %eax
+
+       /* switch to thread stack expects orig_ax and rdi to be pushed */
+       pushq   %rax                    /* pt_regs->orig_ax */
+
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+
+       /* In the Xen PV case we already run on the thread stack. */
+       ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
+
+       movq    %rsp, %rax
+       movq    PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
+
+       pushq   5*8(%rax)               /* regs->ss */
+       pushq   4*8(%rax)               /* regs->rsp */
+       pushq   3*8(%rax)               /* regs->eflags */
+       pushq   2*8(%rax)               /* regs->cs */
+       pushq   1*8(%rax)               /* regs->ip */
+       pushq   0*8(%rax)               /* regs->orig_ax */
+.Lint80_keep_stack:
+
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
+       UNWIND_HINT_REGS
+
+       cld
+
+       IBRS_ENTER
+       UNTRAIN_RET
+
+       movq    %rsp, %rdi
+       call    do_int80_syscall_32
+       jmp     swapgs_restore_regs_and_return_to_usermode
+SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 13639e57e1f8..05fd175cec7d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -569,10 +569,6 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD,          exc_invalid_op);
 DECLARE_IDTENTRY_RAW(X86_TRAP_BP,              exc_int3);
 DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,    exc_page_fault);
 
-#if defined(CONFIG_IA32_EMULATION)
-DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR,      int80_emulation);
-#endif
-
 #ifdef CONFIG_X86_MCE
 #ifdef CONFIG_X86_64
 DECLARE_IDTENTRY_MCE(X86_TRAP_MC,      exc_machine_check);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 84294b66b916..12ef86b19910 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -32,6 +32,10 @@ void entry_SYSCALL_compat(void);
 void entry_SYSCALL_compat_safe_stack(void);
 void entry_SYSRETL_compat_unsafe_stack(void);
 void entry_SYSRETL_compat_end(void);
+void entry_INT80_compat(void);
+#ifdef CONFIG_XEN_PV
+void xen_entry_INT80_compat(void);
+#endif
 #endif
 
 void x86_configure_nx(void);
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index fc77a96040b7..b786d48f5a0f 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -117,7 +117,7 @@ static const __initconst struct idt_data def_idts[] = {
 
        SYSG(X86_TRAP_OF,               asm_exc_overflow),
 #if defined(CONFIG_IA32_EMULATION)
-       SYSG(IA32_SYSCALL_VECTOR,       asm_int80_emulation),
+       SYSG(IA32_SYSCALL_VECTOR,       entry_INT80_compat),
 #elif defined(CONFIG_X86_32)
        SYSG(IA32_SYSCALL_VECTOR,       entry_INT80_32),
 #endif
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index aeb33e0a3f76..bbbfdd495ebd 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = {
        TRAP_ENTRY(exc_int3,                            false ),
        TRAP_ENTRY(exc_overflow,                        false ),
 #ifdef CONFIG_IA32_EMULATION
-       TRAP_ENTRY(int80_emulation,                     false ),
+       { entry_INT80_compat,          xen_entry_INT80_compat,          false },
 #endif
        TRAP_ENTRY(exc_page_fault,                      false ),
        TRAP_ENTRY(exc_divide_error,                    false ),
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1a9cd18dfbd3..9e5e68008785 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check
 #endif /* CONFIG_X86_MCE */
 xen_pv_trap asm_exc_simd_coprocessor_error
 #ifdef CONFIG_IA32_EMULATION
-xen_pv_trap asm_int80_emulation
+xen_pv_trap entry_INT80_compat
 #endif
 xen_pv_trap asm_exc_xen_unknown_trap
 xen_pv_trap asm_exc_xen_hypervisor_callback
-- 
2.44.0

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#13840): 
https://lists.yoctoproject.org/g/linux-yocto/message/13840
Mute This Topic: https://lists.yoctoproject.org/mt/105625701/21656
Group Owner: linux-yocto+ow...@lists.yoctoproject.org
Unsubscribe: https://lists.yoctoproject.org/g/linux-yocto/unsub 
[arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to