http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49457
Summary: integer comparison does not vectorize Product: gcc Version: 4.7.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: vincenzo.innoce...@cern.ch float __attribute__ ((aligned(16))) a[1024]; float __attribute__ ((aligned(16))) b[1024]; // does not vectorize void tVi() { for (int i=0; i!=1024; ++i) { int j = a[i]; b[i] = (j==0) ? a[i] : - a[i]; } } // nicely vectorize... void tVf() { for (int i=0; i!=1024; ++i) { int j = a[i]; float z = j; b[i] = (z==0) ? a[i] : - a[i]; } } c++ -Ofast -c testVectCond.cpp -msse4.2 otool -V -t testVectCond.o | c++filt testVectCond.o: (__TEXT,__text) section tVi(): 0000000000000000 xorl %eax,%eax 0000000000000002 movss 0x00000096(%rip),%xmm2 000000000000000a leaq _a(%rip),%rcx 0000000000000011 nopl tVi()(%rax) 0000000000000018 nopl tVi()(%rax,%rax) 0000000000000020 movss (%rcx,%rax),%xmm0 0000000000000025 cvttss2si %xmm0,%edx 0000000000000029 movaps %xmm0,%xmm1 000000000000002c xorps %xmm2,%xmm1 000000000000002f testl %edx,%edx 0000000000000031 je 0x00000036 0000000000000033 movaps %xmm1,%xmm0 0000000000000036 leaq _b(%rip),%rdx 000000000000003d movss %xmm0,(%rdx,%rax) 0000000000000042 addq $0x04,%rax 0000000000000046 cmpq $0x00001000,%eax 000000000000004c jne 0x00000020 000000000000004e repz/ret tVf(): 0000000000000050 movaps 0x00000059(%rip),%xmm4 0000000000000057 xorl %eax,%eax 0000000000000059 xorps %xmm3,%xmm3 000000000000005c leaq _a(%rip),%rcx 0000000000000063 leaq _b(%rip),%rdx 000000000000006a nopw tVi()(%rax,%rax) 0000000000000070 movaps (%rcx,%rax),%xmm2 0000000000000074 cvttps2dq %xmm2,%xmm0 0000000000000078 cvtdq2ps %xmm0,%xmm0 000000000000007b cmpps $0x4,%xmm3,%xmm0 000000000000007f movaps %xmm2,%xmm1 0000000000000082 xorps %xmm4,%xmm1 0000000000000085 andps %xmm0,%xmm1 0000000000000088 andnps %xmm2,%xmm0 000000000000008b orps %xmm1,%xmm0 000000000000008e movaps %xmm0,(%rdx,%rax) 0000000000000092 addq $0x10,%rax 0000000000000096 cmpq $0x00001000,%eax 000000000000009c jne 0x00000070 000000000000009e repz/ret I'm using g++ -v Using built-in specs. COLLECT_GCC=g++ COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-apple-darwin10.7.0/4.7.0/lto-wrapper Target: x86_64-apple-darwin10.7.0 Configured with: ./configure --enable-languages=c,c++,fortran --enable-lto --with-build-config=bootstrap-lto CFLAGS='-O2 -ftree-vectorize -fPIC' CXXFLAGS='-O2 -fPIC -ftree-vectorize -fvisibility-inlines-hidden' Thread model: posix gcc version 4.7.0 20110528 (experimental) (GCC)