[ This version of the patch passed all my tests! ]

From: Peter Zijlstra <[email protected]>

In order to allow breakpoints to emulate call functions, they need to push
the return address onto the stack. But because the breakpoint exception
frame is added to the stack when the breakpoint is hit, there's no room to
add the address onto the stack and return to the address of the emulated
called funtion.

To handle this, copy the exception frame on entry of the breakpoint handler
and have leave a gap that can be used to add a return address to the stack
frame and return from the breakpoint to the emulated called function,
allowing for that called function to return back to the location after the
breakpoint was placed.

The helper functions were also added:

  int3_emulate_push(): to push the address onto the gap in the stack
  int3_emulate_jmp(): changes the location of the regs->ip to return there.
  int3_emulate_call(): push the return address and change regs->ip

Cc: Andy Lutomirski <[email protected]>
Cc: Nicolai Stange <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: the arch/x86 maintainers <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Jiri Kosina <[email protected]>
Cc: Miroslav Benes <[email protected]>
Cc: Petr Mladek <[email protected]>
Cc: Joe Lawrence <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Sebastian Andrzej Siewior <[email protected]>
Cc: Mimi Zohar <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Nick Desaulniers <[email protected]>
Cc: Nayna Jain <[email protected]>
Cc: Masahiro Yamada <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: "open list:KERNEL SELFTEST FRAMEWORK" <[email protected]>
Cc: [email protected]
Fixes: b700e7f03df5 ("livepatch: kernel: add support for live patching")
Signed-off-by: *** Need Peter Zijlstra's SoB here! ***
Signed-off-by: Steven Rostedt (VMware) <[email protected]>
---

Changes since v1:

 - Updated the 32bit code with Peter's latest changes
 - Added int3 stack check in kernel_stack_pointer()

 arch/x86/entry/entry_32.S            | 117 +++++++++++++++++++++++----
 arch/x86/entry/entry_64.S            |  14 +++-
 arch/x86/include/asm/text-patching.h |  20 +++++
 arch/x86/kernel/ptrace.c             |   6 +-
 4 files changed, 139 insertions(+), 18 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af..2885acd691ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,9 +67,20 @@
 # define preempt_stop(clobbers)        DISABLE_INTERRUPTS(clobbers); 
TRACE_IRQS_OFF
 #else
 # define preempt_stop(clobbers)
-# define resume_kernel         restore_all_kernel
 #endif
 
+.macro RETINT_PREEMPT
+#ifdef CONFIG_PREEMPT
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       cmpl    $0, PER_CPU_VAR(__preempt_count)
+       jnz     .Lend_\@
+       testl   $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception 
path) ?
+       jz      .Lend_\@
+       call    preempt_schedule_irq
+.Lend_\@:
+#endif
+.endm
+
 .macro TRACE_IRQS_IRET
 #ifdef CONFIG_TRACE_IRQFLAGS
        testl   $X86_EFLAGS_IF, PT_EFLAGS(%esp)     # interrupts off?
@@ -753,7 +764,7 @@ ret_from_intr:
        andl    $SEGMENT_RPL_MASK, %eax
 #endif
        cmpl    $USER_RPL, %eax
-       jb      resume_kernel                   # not returning to v8086 or 
userspace
+       jb      restore_all_kernel              # not returning to v8086 or 
userspace
 
 ENTRY(resume_userspace)
        DISABLE_INTERRUPTS(CLBR_ANY)
@@ -763,19 +774,6 @@ ENTRY(resume_userspace)
        jmp     restore_all
 END(ret_from_exception)
 
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
-       DISABLE_INTERRUPTS(CLBR_ANY)
-.Lneed_resched:
-       cmpl    $0, PER_CPU_VAR(__preempt_count)
-       jnz     restore_all_kernel
-       testl   $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception 
path) ?
-       jz      restore_all_kernel
-       call    preempt_schedule_irq
-       jmp     .Lneed_resched
-END(resume_kernel)
-#endif
-
 GLOBAL(__begin_SYSENTER_singlestep_region)
 /*
  * All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1026,6 +1024,7 @@ restore_all:
        INTERRUPT_RETURN
 
 restore_all_kernel:
+       RETINT_PREEMPT
        TRACE_IRQS_IRET
        PARANOID_EXIT_TO_KERNEL_MODE
        BUG_IF_WRONG_CR3
@@ -1476,6 +1475,94 @@ END(nmi)
 
 ENTRY(int3)
        ASM_CLAC
+
+#ifdef CONFIG_VM86
+       testl   $X86_EFLAGS_VM, 8(%esp)
+       jnz     .Lfrom_usermode_no_gap
+#endif
+       testl   $SEGMENT_RPL_MASK, 4(%esp)
+       jnz     .Lfrom_usermode_no_gap
+
+       /*
+        * Here from kernel mode; so the (exception) stack looks like:
+        *
+        * 12(esp) - <previous context>
+        *  8(esp) - flags
+        *  4(esp) - cs
+        *  0(esp) - ip
+        *
+        * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+        * is complete and in particular regs->sp is correct. This gives us
+        * the original 3 enties as gap:
+        *
+        * 32(esp) - <previous context>
+        * 28(esp) - orig_flags / gap
+        * 24(esp) - orig_cs    / gap
+        * 20(esp) - orig_ip    / gap
+        * 16(esp) - ss
+        * 12(esp) - sp
+        *  8(esp) - flags
+        *  4(esp) - cs
+        *  0(esp) - ip
+        */
+       pushl   %ss       # ss
+       pushl   %esp      # sp (points at ss)
+       pushl   4*4(%esp) # flags
+       pushl   4*4(%esp) # cs
+       pushl   4*4(%esp) # ip
+
+       add     $16, 12(%esp) # point sp back at the previous context
+
+       pushl   $-1                             # orig_eax; mark as interrupt
+
+       SAVE_ALL
+       ENCODE_FRAME_POINTER
+       TRACE_IRQS_OFF
+       xorl    %edx, %edx                      # zero error code
+       movl    %esp, %eax                      # pt_regs pointer
+       call    do_int3
+
+       RETINT_PREEMPT
+       TRACE_IRQS_IRET
+       /*
+        * If we really never INT3 from entry code, it looks like
+        * we can skip this one.
+       PARANOID_EXIT_TO_KERNEL_MODE
+        */
+       BUG_IF_WRONG_CR3
+       RESTORE_REGS 4                          # consume orig_eax
+
+       /*
+        * Reconstruct the 3 entry IRET frame right after the (modified)
+        * regs->sp without lowering %esp in between, such that an NMI in the
+        * middle doesn't scribble our stack.
+        */
+
+       pushl   %eax
+       pushl   %ecx
+       movl    5*4(%esp), %eax         # (modified) regs->sp
+
+       movl    4*4(%esp), %ecx         # flags
+       movl    %ecx, -4(%eax)
+
+       movl    3*4(%esp), %ecx         # cs
+       andl    $0x0000ffff, %ecx
+       movl    %ecx, -8(%eax)
+
+       movl    2*4(%esp), %ecx         # ip
+       movl    %ecx, -12(%eax)
+
+       movl    1*4(%esp), %ecx         # eax
+       movl    %ecx, -16(%eax)
+
+       popl    %ecx
+       lea     -16(%eax), %esp
+       popl    %eax
+
+       jmp     .Lirq_return
+
+.Lfrom_usermode_no_gap:
+
        pushl   $-1                             # mark this as an int
 
        SAVE_ALL switch_stacks=1
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..834ec1397dab 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR                       
irq_work_interrupt              smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 
create_gap=0
 ENTRY(\sym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -899,6 +899,16 @@ ENTRY(\sym)
        jnz     .Lfrom_usermode_switch_stack_\@
        .endif
 
+       .if \create_gap == 1
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jnz     .Lfrom_usermode_no_gap_\@
+       .rept 6
+       pushq   5*8(%rsp)
+       .endr
+       UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+       .endif
+
        .if \paranoid
        call    paranoid_entry
        .else
@@ -1130,7 +1140,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug                 do_debug                has_error_code=0        
paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3                  do_int3                 has_error_code=0
+idtentry int3                  do_int3                 has_error_code=0        
create_gap=1
 idtentry stack_segment         do_stack_segment        has_error_code=1
 
 #ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/asm/text-patching.h 
b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..ba275b6292db 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -39,4 +39,24 @@ extern int poke_int3_handler(struct pt_regs *regs);
 extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void 
*handler);
 extern int after_bootmem;
 
+static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+       regs->sp -= sizeof(unsigned long);
+       *(unsigned long *)regs->sp = val;
+}
+
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+       regs->ip = ip;
+}
+
+#define INT3_INSN_SIZE 1
+#define CALL_INSN_SIZE 5
+
+static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+       int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+       int3_emulate_jmp(regs, func);
+}
+
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 4b8ee05dd6ad..600ead178bf4 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -171,8 +171,12 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
        unsigned long sp = (unsigned long)&regs->sp;
        u32 *prev_esp;
 
-       if (context == (sp & ~(THREAD_SIZE - 1)))
+       if (context == (sp & ~(THREAD_SIZE - 1))) {
+               /* int3 code adds a gap */
+               if (sp == regs->sp - 5*4)
+                       return regs->sp;
                return sp;
+       }
 
        prev_esp = (u32 *)(context);
        if (*prev_esp)
-- 
2.20.1

Reply via email to