#2253: Native code generator could do better
---------------------------------+------------------------------------------
    Reporter:  dons              |       Owner:                         
        Type:  bug               |      Status:  new                    
    Priority:  normal            |   Milestone:  7.6.1                  
   Component:  Compiler (NCG)    |     Version:  6.8.2                  
    Keywords:                    |          Os:  Unknown/Multiple       
Architecture:  Unknown/Multiple  |     Failure:  Runtime performance bug
  Difficulty:  Unknown           |    Testcase:                         
   Blockedby:                    |    Blocking:                         
     Related:                    |  
---------------------------------+------------------------------------------
Changes (by simonmar):

  * blockedby:  4258 =>


Comment:

 I came to check these with the new backend, and it turns out that the old
 backend is doing just fine on these now.  It might be mostly due to this:
 3d8ab554ced45c51f39951f29cc53277d5788c37.

 These are compiled with HEAD as of yesterday, with -O2.

 Program 1:

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2vG:
         cmpq $100000000,%rsi
         jle .Lc2vM
         movq %r14,%rbx
         jmp *0(%rbp)
 .Lc2vM:
         cmpq $100000001,%rdi
         jle .Lc2vO
         movq %r14,%rbx
         jmp *0(%rbp)
 .Lc2vO:
         cmpq $100000008,%r8
         jle .Lc2vR
         movq %r14,%rbx
         jmp *0(%rbp)
 .Lc2vR:
         movq %rdi,%rbx
         imulq %r8,%rbx
         movq %rsi,%rax
         imulq %rbx,%rax
         addq %rax,%r14
         incq %rsi
         incq %rdi
         incq %r8
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 }}}

 The new code generator does a bit better, commoning up the duplicate
 blocks:

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2vW:
         cmpq $100000000,%rsi
         jle .Lc2wt
 .Lc2wj:
         movq %r14,%rbx
         jmp *(%rbp)
 .Lc2wt:
         cmpq $100000001,%rdi
         jg .Lc2wj
         cmpq $100000008,%r8
         jg .Lc2wj
         movq %rdi,%rbx
         imulq %r8,%rbx
         movq %rsi,%rax
         imulq %rbx,%rax
         addq %rax,%r14
         incq %rsi
         incq %rdi
         incq %r8
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 }}}


 Program 2 (with `-O2 -fno-regs-graph`, the graph-colouring allocator
 generates a tiny bit worse code on this one):

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2mJ:
         testq %rsi,%rsi
         jle .Lc2mR
 .Lc2mS:
         addq $4,%r14
         decq %rsi
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 .Lc2mR:
         movl $1000000000,%esi
         jmp r2kR_info

 r2kR_info:
 .Lc2m8:
         testq %rsi,%rsi
         jle .Lc2mg
 .Lc2mh:
         addq $28,%r14
         decq %rsi
         jmp r2kR_info
 .Lc2mg:
         movq %r14,%rbx
         jmp *(%rbp)
 }}}


 Program 3:

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2hW:
         testq %rsi,%rsi
         jle .Lc2i1
         addq $8,%r14
         decq %rsi
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 .Lc2i1:
         movq %r14,%rbx
         jmp *0(%rbp)
 }}}

 Program 4:

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2lj:
         testq %rsi,%rsi
         jle .Lc2lo
         addq $36,%r14
         decq %rsi
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 .Lc2lo:
         movq %r14,%rbx
         jmp *0(%rbp)
 }}}

 Program 5:

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2rk:
         cmpq $100000000,%rsi
         jle .Lc2ro
         movq %r14,%rbx
         jmp *0(%rbp)
 .Lc2ro:
         cmpq $100000001,%rdi
         jle .Lc2rr
         movq %r14,%rbx
         jmp *0(%rbp)
 .Lc2rr:
         addq %rsi,%r14
         incq %rsi
         incq %rdi
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 }}}

 Program 6:

 {{{
 Main_mainzuzdszdwfoldlMzqzuloop_info:
 .Lc2tu:
         testq %r14,%r14
         jle .Lc2tA
         cmpq $39999999,%rsi
         jle .Lc2tD
         jmp *0(%rbp)
 .Lc2tA:
         jmp *0(%rbp)
 .Lc2tD:
         cvtsi2sdq %rsi,%xmm0
         movsd .Ln2tF(%rip),%xmm1
         mulsd %xmm0,%xmm1
         addsd %xmm1,%xmm5
         decq %r14
         incq %rsi
         jmp Main_mainzuzdszdwfoldlMzqzuloop_info
 }}}

 We still need the strength reduction, I'll make a separate ticket for
 that.

-- 
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/2253#comment:16>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler

_______________________________________________
Glasgow-haskell-bugs mailing list
Glasgow-haskell-bugs@haskell.org
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs

Reply via email to