On Fri, 24 Nov 2017, Ingo Molnar wrote:
> From: Andy Lutomirski <[email protected]>
> @@ -563,6 +563,13 @@ END(irq_entries_start)
>  /* 0(%rsp): ~(interrupt number) */
>       .macro interrupt func
>       cld
> +
> +     testb   $3, CS-ORIG_RAX(%rsp)
> +     jz      1f
> +     SWAPGS
> +     call    switch_to_thread_stack
> +1:

Yes, that's what I thought it should look like.

>       ALLOC_PT_GPREGS_ON_STACK
>       SAVE_C_REGS
>       SAVE_EXTRA_REGS
> @@ -572,12 +579,8 @@ END(irq_entries_start)
>       jz      1f

If you change that to 2f and adjust the label down there it gets even
simpler to read. I know it works, but I still find it disturbing.

>       /*
> -      * IRQ from user mode.  Switch to kernel gsbase and inform context
> -      * tracking that we're in kernel mode.
> -      */
> -     SWAPGS
> -
> -     /*
> +      * IRQ from user mode.
> +      *
>        * We need to tell lockdep that IRQs are off.  We can't do this until
>        * we fix gsbase, and we should do it before enter_from_user_mode
>        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
> @@ -831,6 +834,32 @@ apicinterrupt IRQ_WORK_VECTOR                    
> irq_work_interrupt              smp_irq_work_interrupt
>   */
>  #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
>  
> +/*
> + * Switch to the thread stack.  This is called with the IRET frame and
> + * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
> + * space has not been allocated for them.)
> + */
> +ENTRY(switch_to_thread_stack)
> +     UNWIND_HINT_FUNC
> +
> +     pushq   %rdi
> +     movq    %rsp, %rdi
> +     movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
> +     UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
> +
> +     pushq   7*8(%rdi)               /* regs->ss */
> +     pushq   6*8(%rdi)               /* regs->rsp */
> +     pushq   5*8(%rdi)               /* regs->eflags */
> +     pushq   4*8(%rdi)               /* regs->cs */
> +     pushq   3*8(%rdi)               /* regs->ip */
> +     pushq   2*8(%rdi)               /* regs->orig_ax */
> +     pushq   8(%rdi)                 /* return address */
> +     UNWIND_HINT_FUNC
> +
> +     movq    (%rdi), %rdi
> +     ret
> +END(switch_to_thread_stack)

Much nicer.

>  .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
>  ENTRY(\sym)
>       UNWIND_HINT_IRET_REGS offset=\has_error_code*8
> @@ -848,11 +877,12 @@ ENTRY(\sym)
>  
>       ALLOC_PT_GPREGS_ON_STACK
>  
> -     .if \paranoid
> -     .if \paranoid == 1
> +     .if \paranoid < 2
>       testb   $3, CS(%rsp)                    /* If coming from userspace, 
> switch stacks */
> -     jnz     1f
> +     jnz     .Lfrom_usermode_switch_stack_\@
>       .endif
> +
> +     .if \paranoid
>       call    paranoid_entry
>       .else
>       call    error_entry
> @@ -894,20 +924,15 @@ ENTRY(\sym)
>       jmp     error_exit
>       .endif
>  
> -     .if \paranoid == 1
> +     .if \paranoid < 2
>       /*
> -      * Paranoid entry from userspace.  Switch stacks and treat it
> +      * Entry from userspace.  Switch stacks and treat it
>        * as a normal entry.  This means that paranoid handlers
>        * run in real process context if user_mode(regs).
>        */
> -1:
> +.Lfrom_usermode_switch_stack_\@:
>       call    error_entry
>  
> -
> -     movq    %rsp, %rdi                      /* pt_regs pointer */
> -     call    sync_regs
> -     movq    %rax, %rsp                      /* switch stack */
> -
>       movq    %rsp, %rdi                      /* pt_regs pointer */
>  
>       .if \has_error_code
> @@ -1170,6 +1195,14 @@ ENTRY(error_entry)
>       SWAPGS
>  
>  .Lerror_entry_from_usermode_after_swapgs:
> +     /* Put us onto the real thread stack. */
> +     popq    %r12                            /* save return addr in %12 */
> +     movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
> +     call    sync_regs
> +     movq    %rax, %rsp                      /* switch stack */
> +     ENCODE_FRAME_POINTER
> +     pushq   %r12
> +

Reviewed-by: Thomas Gleixner <[email protected]>

Reply via email to