[Bug tree-optimization/39821] 120% slowdown with vectorizer

2009-04-20 Thread ramiro86 at hotmail dot com


--- Comment #3 from ramiro86 at hotmail dot com  2009-04-21 00:08 ---
Created an attachment (id=17660)
 -- (http://gcc.gnu.org/bugzilla/attachment.cgi?id=17660action=view)
tarball of a simple testcase


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39821



[Bug tree-optimization/39821] 120% slowdown with vectorizer

2009-04-20 Thread ramiro86 at hotmail dot com


--- Comment #4 from ramiro86 at hotmail dot com  2009-04-21 00:10 ---
I've attached a simple testcase. The system I'm running this on is a q6600 with
64-bit Linux.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39821



[Bug tree-optimization/39821] New: 120% slowdown with vectorizer

2009-04-19 Thread ramiro86 at hotmail dot com
:   75 e8   jne140 dotproduct+0x140
 158:   f3 c3   repz retq 
 15a:   66 0f 1f 44 00 00   nopw   0x0(%rax,%rax,1)

0160 dotproduct_order4:
 160:   66 0f ef c0 pxor   %xmm0,%xmm0
 164:   f3 0f 6f 0f movdqu (%rdi),%xmm1
 168:   f3 0f 6f 1e movdqu (%rsi),%xmm3
 16c:   66 0f 6f d0 movdqa %xmm0,%xmm2
 170:   66 0f 6f f1 movdqa %xmm1,%xmm6
 174:   66 0f 66 c1 pcmpgtd %xmm1,%xmm0
 178:   66 0f 6f fb movdqa %xmm3,%xmm7
 17c:   66 0f 66 d3 pcmpgtd %xmm3,%xmm2
 180:   66 0f 62 f0 punpckldq %xmm0,%xmm6
 184:   66 0f 62 fa punpckldq %xmm2,%xmm7
 188:   66 0f 6a da punpckhdq %xmm2,%xmm3
 18c:   66 0f 6a c8 punpckhdq %xmm0,%xmm1
 190:   66 0f 6f ee movdqa %xmm6,%xmm5
 194:   66 44 0f 6f c7  movdqa %xmm7,%xmm8
 199:   66 0f 6f e7 movdqa %xmm7,%xmm4
 19d:   66 0f 6f c3 movdqa %xmm3,%xmm0
 1a1:   66 0f 73 d5 20  psrlq  $0x20,%xmm5
 1a6:   66 44 0f f4 c6  pmuludq %xmm6,%xmm8
 1ab:   66 0f f4 ef pmuludq %xmm7,%xmm5
 1af:   66 0f 6f d1 movdqa %xmm1,%xmm2
 1b3:   66 0f 73 d4 20  psrlq  $0x20,%xmm4
 1b8:   66 0f 73 f5 20  psllq  $0x20,%xmm5
 1bd:   66 0f f4 e6 pmuludq %xmm6,%xmm4
 1c1:   66 41 0f d4 e8  paddq  %xmm8,%xmm5
 1c6:   66 0f 73 f4 20  psllq  $0x20,%xmm4
 1cb:   66 0f d4 e5 paddq  %xmm5,%xmm4
 1cf:   66 0f 6f eb movdqa %xmm3,%xmm5
 1d3:   66 0f f4 c1 pmuludq %xmm1,%xmm0
 1d7:   66 0f 73 d2 20  psrlq  $0x20,%xmm2
 1dc:   66 0f f4 d3 pmuludq %xmm3,%xmm2
 1e0:   66 0f 73 f2 20  psllq  $0x20,%xmm2
 1e5:   66 0f d4 c2 paddq  %xmm2,%xmm0
 1e9:   66 0f 73 d5 20  psrlq  $0x20,%xmm5
 1ee:   66 0f f4 cd pmuludq %xmm5,%xmm1
 1f2:   66 0f 73 f1 20  psllq  $0x20,%xmm1
 1f7:   66 0f d4 c1 paddq  %xmm1,%xmm0
 1fb:   66 0f d4 c4 paddq  %xmm4,%xmm0
 1ff:   66 0f 6f c8 movdqa %xmm0,%xmm1
 203:   66 0f 73 d9 08  psrldq $0x8,%xmm1
 208:   66 0f d4 c1 paddq  %xmm1,%xmm0
 20c:   66 0f d6 44 24 f8   movq   %xmm0,-0x8(%rsp)
 212:   48 8b 44 24 f8  mov-0x8(%rsp),%rax
 217:   c3  retq   
$ objdump -d dotproduct-no-vectorize.o

dotproduct-no-vectorize.o: file format elf64-x86-64


Disassembly of section .text:

 dotproduct:
   0:   31 c0   xor%eax,%eax
   2:   85 d2   test   %edx,%edx
   4:   74 2a   je 30 dotproduct+0x30
   6:   83 ea 01sub$0x1,%edx
   9:   4c 8d 0c 95 04 00 00lea0x4(,%rdx,4),%r9
  10:   00 
  11:   31 d2   xor%edx,%edx
  13:   0f 1f 44 00 00  nopl   0x0(%rax,%rax,1)
  18:   48 63 0c 16 movslq (%rsi,%rdx,1),%rcx
  1c:   4c 63 04 17 movslq (%rdi,%rdx,1),%r8
  20:   48 83 c2 04 add$0x4,%rdx
  24:   49 0f af c8 imul   %r8,%rcx
  28:   48 01 c8add%rcx,%rax
  2b:   4c 39 cacmp%r9,%rdx
  2e:   75 e8   jne18 dotproduct+0x18
  30:   f3 c3   repz retq 
  32:   66 66 66 66 66 2e 0fnopw   %cs:0x0(%rax,%rax,1)
  39:   1f 84 00 00 00 00 00 

0040 dotproduct_order4:
  40:   48 63 07movslq (%rdi),%rax
  43:   48 63 16movslq (%rsi),%rdx
  46:   48 63 4f 04 movslq 0x4(%rdi),%rcx
  4a:   48 0f af d0 imul   %rax,%rdx
  4e:   48 63 46 04 movslq 0x4(%rsi),%rax
  52:   48 0f af c1 imul   %rcx,%rax
  56:   48 63 4f 08 movslq 0x8(%rdi),%rcx
  5a:   48 01 c2add%rax,%rdx
  5d:   48 63 46 08 movslq 0x8(%rsi),%rax
  61:   48 0f af c1 imul   %rcx,%rax
  65:   48 63 4f 0c movslq 0xc(%rdi),%rcx
  69:   48 01 c2add%rax,%rdx
  6c:   48 63 46 0c movslq 0xc(%rsi),%rax
  70:   48 0f af c1 imul   %rcx,%rax
  74:   48 01 d0add%rdx,%rax
  77:   c3  retq


-- 
   Summary: 120% slowdown with vectorizer
   Product: gcc
   Version: 4.4.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: ramiro86 at hotmail dot com
GCC target triplet: x86_64-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39821