Target: x86_64-unknown-linux-gnu
Configured with: ../gcc-trunk/configure --prefix=/home/jeff/gnu/TR
--program-suffix=TR --enable-languages=c,c++
Thread model: posix
gcc version 4.6.0 20100608 (experimental) (GCC) 

While running some tests against SSE4.2 instructions, I noticed that the
__builtin_ia32_pcmpestri128 method generates the correct pcmpestri call
followed immediately by an extraneous pcmpestrm call.  The second call goes
away when compiled with any optimization level.

A very simple test program requiring no pre-processing:

BEGIN SAMPLE: sseTest2.c
typedef long long __m128i __attribute__ ((__vector_size__ (16),
__may_alias__));
typedef char __v16qi __attribute__ ((__vector_size__ (16)));

int
main()
{
        __v16qi c = (__v16qi){ 'K' };
        __v16qi str1 =
(__v16qi){'A','B','C','D','E','F','G','H','I','J','K','L','M'};

        int v = __builtin_ia32_pcmpestri128(c, 1, str1, 13, 0);

        return v;
}
END SAMPLE

Building with:
~/gnu/TR/bin/gccTR -S -msse4.2 sseTest2.c -o sseTest2.nonoptimized.s

shows the extra opcode:
    movdqa  .LC0(%rip), %xmm0
    movdqa  %xmm0, -32(%rbp)
    movdqa  .LC1(%rip), %xmm0
    movdqa  %xmm0, -48(%rbp)
    movdqa  -48(%rbp), %xmm1
    movdqa  -32(%rbp), %xmm0
    movl    $1, %eax
    movl    $13, %edx
    pcmpestri   $0, %xmm1, %xmm0
    pcmpestrm   $0, %xmm1, %xmm0
    movl    %ecx, -4(%rbp)
    movl    -4(%rbp), %eax
    leave


Building with:
~/gnu/TR/bin/gccTR -S -O -msse4.2 sseTest2.c -o sseTest2.optimized.s

shows no extra opcode:
    movdqa  .LC0(%rip), %xmm0
    movl    $1, %eax
    movl    $13, %edx
    pcmpestri   $0, .LC1(%rip), %xmm0
    movl    %ecx, %eax
    ret


-- 
           Summary: __builtin_ia32_pcmpestri128 generates an additional
                    pcmpestrm operation
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: jeff_wegher at yahoo dot com
 GCC build triplet: 4.6.0
  GCC host triplet: x86_64-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44472

Reply via email to