On Thu, Nov 30, 2017 at 10:29:44PM -0800, Andy Lutomirski wrote:
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index caf74a1bb3de..28f4e7553c26 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -180,14 +180,24 @@ ENTRY(entry_SYSCALL_64_trampoline)
>  
>       /*
>        * x86 lacks a near absolute jump, and we can't jump to the real
> -      * entry text with a relative jump, so we fake it using retq.
> +      * entry text with a relative jump.  We could push the target
> +      * address and then use retq, but this destroys the pipeline on
> +      * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
> +      * spill RDI and restore it in a second-stage trampoline.
>        */
> -     pushq   $entry_SYSCALL_64_after_hwframe
> -     retq
> +     pushq   %rdi
> +     movq    $entry_SYSCALL_64_stage2, %rdi
> +     jmp     *%rdi
>  END(entry_SYSCALL_64_trampoline)
>  
>       .popsection
>  
> +ENTRY(entry_SYSCALL_64_stage2)
> +     UNWIND_HINT_EMPTY
> +     popq    %rdi
> +     jmp     entry_SYSCALL_64_after_hwframe
> +END(entry_SYSCALL_64_stage2)
> +
>  ENTRY(entry_SYSCALL_64)
>       UNWIND_HINT_EMPTY
>       /*

Another crazy idea:

        call    1f
1:      movq    $entry_SYSCALL_64_after_hwframe, (%rsp)
        ret

Does that fix the regression?

-- 
Josh

Reply via email to