--- Short description

Use of xmm intrinsics generates useless code: mmx registers seem to be written
back to the stack systematicaly, even when this is not needed. Maybe it's
related to already reported bug about MMX.

--- GCC version (4.2.1, prebuild version for mingw)

Bugs was seen at least with 4.0.x and 4.1.x, under linux as well. I don't
remember exact build params, but as long as I remember this prevented me to use
mmx intrinsics.

-march=pentium4 option is irelevant (command line below). Use donly to enable
mmx/sse.

> g++ -v
Using built-in specs.
Target: mingw32
Configured with: ../gcc-4.2.1-2-src/configure --with-gcc --enable-libgomp
--host=mingw32 --build=mingw32 --target=mingw32 --program-suffix=-dw2
--with-arch=i486 --with-tune=generic --disable-werror --prefix=/mingw
--with-local-prefix=/mingw --enable-threads --disable-nls
--enable-languages=c,c++,fortran,objc,obj-c++,ada --disable-win32-registry
--disable-sjlj-exceptions --enable-libstdcxx-debug
--enable-cxx-flags=-fno-function-sections -fno-data-sections
--enable-version-specific-runtime-libs --disable-bootstrap
Thread model: win32
gcc version 4.2.1-dw2 (mingw32-2)

--- Source code ---

> cat bug.cc

#if defined (SSE)
#include <emmintrin.h>
#elif defined (MMX)
#include <mmintrin.h>
#endif

struct X {
#if defined (SSE)
  __m128i data[2];
#elif defined (MMX)
  __m64   data[2];
#else
  int     data[2];
#endif
};

void foo (X& a, const X& b)
{
  for (int k = 0; k < 2; ++k) {
#if defined (SSE)
    a.data[k] = _mm_xor_si128 (a.data[k], b.data[k]);
#elif defined (MMX)
    a.data[k] = _mm_xor_si64 (a.data[k], b.data[k]);
#else
    a.data[k] ^= b.data[k];
#endif
  }
}

--- Assembly (SSE, OK)

> g++ -S -O3 bug.cc -DSSE -march=pentium4 -fomit-frame-pointer

.globl __Z3fooR1XRKS_
        .def    __Z3fooR1XRKS_; .scl    2;      .type   32;     .endef
__Z3fooR1XRKS_:
LFB472:
        movl    4(%esp), %eax
        movl    8(%esp), %edx
        movdqa  (%eax), %xmm0
        pxor    (%edx), %xmm0
        movdqa  %xmm0, (%eax)
        movdqa  16(%eax), %xmm0
        pxor    16(%edx), %xmm0
        movdqa  %xmm0, 16(%eax)
        ret
LFE472:

--- Assembly (MMX, BAD)

> g++ -S -O3 bug.cc -DMMX -march=pentium4 -fomit-frame-pointer

.globl __Z3fooR1XRKS_
        .def    __Z3fooR1XRKS_; .scl    2;      .type   32;     .endef
__Z3fooR1XRKS_:
LFB124:
        subl    $20, %esp               <--- Useless
LCFI0:
        movl    24(%esp), %eax
        movl    28(%esp), %edx
        movq    (%eax), %mm0
        movq    %mm0, 8(%esp)           <--- Useless
        pxor    (%edx), %mm0
        movq    %mm0, (%eax)
        movq    8(%eax), %mm0
        movq    %mm0, (%esp)            <--- Useless
        pxor    8(%edx), %mm0
        movq    %mm0, 8(%eax)
        addl    $20, %esp               <--- Useless
        ret
LFE124:


-- 
           Summary: MMX bad optimization with intrinsics
           Product: gcc
           Version: 4.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: etjq78kl at free dot fr


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35142

Reply via email to