#2253: Native code generator could do better
-----------------------------------------+----------------------------------
    Reporter:  dons                      |       Owner:                
        Type:  run-time performance bug  |      Status:  new           
    Priority:  normal                    |   Component:  Compiler (NCG)
     Version:  6.8.2                     |    Severity:  normal        
    Keywords:                            |    Testcase:                
Architecture:  x86_64 (amd64)            |          Os:  Unknown       
-----------------------------------------+----------------------------------
 An example set of programs that came up in the ndp library, where the C
 backend outperforms
 the current native code generator. Logging them here so we don't forget to
 check again with
 the new backend.

 == Program 1 ==

 {{{

 import Data.Array.Vector
 import Data.Bits
 main = print . sumU $ zipWith3U (\x y z -> x * y * z)
                         (enumFromToU 1 (100000000 :: Int))
                         (enumFromToU 2 (100000001 :: Int))
                         (enumFromToU 7 (100000008 :: Int))

 }}}

 Core:

 {{{

 Main.$s$wfold =
   \ (sc_sPH :: Int#)
     (sc1_sPI :: Int#)
     (sc2_sPJ :: Int#)
     (sc3_sPK :: Int#) ->
     case ># sc2_sPJ 100000000 of wild_aJo {
       False ->
         case ># sc1_sPI 100000001 of wild1_XK6 {
           False ->
             case ># sc_sPH 100000008 of wild2_XKd {
               False ->
                 Main.$s$wfold
                   (+# sc_sPH 1)
                   (+# sc1_sPI 1)
                   (+# sc2_sPJ 1)
                   (+# sc3_sPK (*# (*# sc2_sPJ sc1_sPI) sc_sPH));
               True -> sc3_sPK
             };
           True -> sc3_sPK
         };
       True -> sc3_sPK
     }

 }}}}

 Which is great.

 C backend:

 {{{

 Main_zdszdwfold_info:
   .text
   .p2align 4,,15
 .text
   .align 8
   .type     Main_zdszdwfold_info, @function
   cmpq        $100000000, %r8
   jg  .L9
   cmpq        $100000001, %rdi
   jg  .L9
   cmpq        $100000008, %rsi
   jg  .L9
   movq        %r8, %rdx
   incq        %r8
   imulq       %rdi, %rdx
   incq        %rdi
   imulq       %rsi, %rdx
   incq        %rsi
   addq        %rdx, %r9
   jmp Main_zdszdwfold_info
 .L5:
 .L7:
   .p2align 6,,7
 .L9:
   movq        %r9, %rbx
   jmp *(%rbp)


 }}}


 Native code generator:


 {{{

 Main_zdszdwfold_info:
   cmpq $100000000,%r8
   jg .LcRP
   cmpq $100000001,%rdi
   jg .LcRR
   cmpq $100000008,%rsi
   jg .LcRU
   movq %rdi,%rax
   imulq %rsi,%rax
   movq %r8,%rcx
   imulq %rax,%rcx
   movq %r9,%rax
   addq %rcx,%rax
   leaq 1(%r8),%rcx
   leaq 1(%rdi),%rdx
   incq %rsi
   movq %rdx,%rdi
   movq %rcx,%r8
   movq %rax,%r9
   jmp Main_zdszdwfold_info
 .LcRP:
   movq %r9,%rbx
   jmp *(%rbp)
 .LcRR:
   movq %r9,%rbx
   jmp *(%rbp)
 .LcRU:
   movq %r9,%rbx
   jmp *(%rbp)

 }}}

 Runtime performance:

   C backend:    0.269
   Asm backend:  0.410s


 == Program 2 ==

 Source:

 {{{

 import Data.Array.Vector
 import Data.Bits
 main = print . sumU . mapU (`shiftL` 2) $
             appendU (replicateU 1000000000 (1::Int))
                     (replicateU 1000000000 (7::Int))

 }}}

 Core:

 {{{

 $s$wfold_rPr =
   \ (sc_sOw :: Int#) (sc1_sOx :: Int#) ->
     case sc_sOw of wild_X1j {
       __DEFAULT -> $s$wfold_rPr (+# wild_X1j 1) (+# sc1_sOx 28);
       1000000000 -> sc1_sOx
     }
 }}}

 Runtime:

     Native backend: 2.637
     C backend:      2.365

-- 
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/2253>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
_______________________________________________
Glasgow-haskell-bugs mailing list
Glasgow-haskell-bugs@haskell.org
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs

Reply via email to