I've filed this as bug 6774 on the LLVM trac
(http://llvm.org/bugs/show_bug.cgi?id=6774), so the issue at least
stands a chance of being fixed.

On 2 April 2010 16:02, Max Bolingbroke <[email protected]> wrote:
> Hi,
>
> I've narrowed down one of the problems with the LLVM backend that
> David descried in his thesis. LLVM sometimes generates gratuitous
> extra stack manipulations.
>
> For example, consider this input program:
>
> {{{
> module Toy where
>
> toy :: (Int -> Maybe Int) -> Int
> toy f = case f 10 of Just x -> x; Nothing -> 20
> }}}
>
> Compiled using ghc -O (which runs llvl's "opt -O2"), we get the
> following LLVM IR:
>
> {{{
> define cc10 void @Toy_toy_entry(i32 %stg_terei_baseArg, i32
> %stg_terei_spArg, i32 %stg_terei_hpArg, i32 %stg_terei_r1Arg) nounwind
> {
> cdx:
>  %ndz = add i32 %stg_terei_spArg, -4             ; <i32> [#uses=3]
>  %ndB = add i32 %stg_terei_baseArg, 84           ; <i32> [#uses=1]
>  %ndC = inttoptr i32 %ndB to i32*                ; <i32*> [#uses=1]
>  %ndD = load i32* %ndC                           ; <i32> [#uses=1]
>  %ndE = icmp ult i32 %ndz, %ndD                  ; <i1> [#uses=1]
>  br i1 %ndE, label %cdG, label %ndH
>
> ndH:                                              ; preds = %cdx
>  %ndJ = inttoptr i32 %stg_terei_spArg to i32*    ; <i32*> [#uses=2]
>  %ndK = load i32* %ndJ                           ; <i32> [#uses=1]
>  %ndP = inttoptr i32 %ndz to i32*                ; <i32*> [#uses=1]
>  store i32 add (i32 ptrtoint (%Toy_toy1_closure_struct*
> @Toy_toy2_closure to i32), i32 1), i32* %ndP
>  store i32 ptrtoint (%sbo_info_struct* @sbo_info to i32), i32* %ndJ
>  tail call cc10 void @stg_ap_p_fast(i32 %stg_terei_baseArg, i32 %ndz,
> i32 %stg_terei_hpArg, i32 %ndK) nounwind
>  ret void
>
> cdG:                                              ; preds = %cdx
>  %ne1 = add i32 %stg_terei_baseArg, -4           ; <i32> [#uses=1]
>  %ne2 = inttoptr i32 %ne1 to i32*                ; <i32*> [#uses=1]
>  %ne3 = load i32* %ne2                           ; <i32> [#uses=1]
>  %ne4 = inttoptr i32 %ne3 to void (i32, i32, i32, i32)* ; <void (i32,
> i32, i32, i32)*> [#uses=1]
>  tail call cc10 void %ne4(i32 %stg_terei_baseArg, i32
> %stg_terei_spArg, i32 %stg_terei_hpArg, i32 ptrtoint
> (%Toy_toy_closure_struct* @Toy_toy_closure to i32)) nounwind
>  ret void
> }
> }}}
>
> So far so good. However, when we compile this with "llc -tailcallopt
> -O2 -march=x86", we get:
>
> {{{
> _Toy_toy_entry:                         ## @Toy_toy_entry
> ## BB#0:                                ## %cdx
>        subl    $12, %esp
>        leal    -4(%ebp), %eax
>        cmpl    84(%ebx), %eax
>        jb      LBB2_2
> ## BB#1:                                ## %ndH
>        movl    (%ebp), %esi
>        movl    $_Toy_toy2_closure+1, -4(%ebp)
>        movl    $_sbo_info, (%ebp)
>        movl    %eax, %ebp
>        addl    $12, %esp
>        jmp     _stg_ap_p_fast  # TAILCALL
> LBB2_2:                                 ## %cdG
>        movl    -4(%ebx), %eax
>        movl    $_Toy_toy_closure, %esi
>        addl    $12, %esp
>        jmpl    *%eax  # TAILCALL
>
>        .globl  ___stginit_Toy_
>        .align  4, 0x90
> }}}
>
> Note the "subl  $12, %esp" and "addl    $12, %esp" pairs. %esp is never
> actually used, so this fiddling is entirely pointless! This seems to
> happen in every single function LLVM compiles, so I imagine fixing the
> problem would be quite a big win.
>
> Without -tailcallopt, we get nice code:
>
> {{{
> _Toy_toy_entry:                         ## @Toy_toy_entry
> ## BB#0:                                ## %cdx
>        leal    -4(%ebp), %eax
>        cmpl    84(%ebx), %eax
>        jb      LBB2_2
> ## BB#1:                                ## %ndH
>        movl    (%ebp), %esi
>        movl    $_Toy_toy2_closure+1, -4(%ebp)
>        movl    $_sbo_info, (%ebp)
>        movl    %eax, %ebp
>        jmp     _stg_ap_p_fast  # TAILCALL
> LBB2_2:                                 ## %cdG
>        movl    -4(%ebx), %eax
>        movl    $_Toy_toy_closure, %esi
>        jmpl    *%eax  # TAILCALL
> }}}
>
> Unfortunately -tailcallopt is apparently the only way to get
> *guaranteed* tail calls. The LLVM developers appear to know that
> tailcallopt causes this sort of rubbish in the output
> (http://old.nabble.com/Removing--tailcallopt--td27475523.html) but
> obviously haven't fixed it (my results are with the very recent LLVM
> r100183).
>
> I couldn't get the stack manipulations to go away with any amount of
> "noreturn" and "unreachable" annotations around "tail call"s and
> function definitions. It was worth a try though :-)
>
> In summary, it looks like anyone wanting to fix the excess stack
> manipulations issue is going to have to get their hands messy and
> delve into LLVM's LLC!
>
> Cheers,
> Max
>

_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

Reply via email to