--- Short description Use of xmm intrinsics generates useless code: mmx registers seem to be written back to the stack systematicaly, even when this is not needed. Maybe it's related to already reported bug about MMX.
--- GCC version (4.2.1, prebuild version for mingw) Bugs was seen at least with 4.0.x and 4.1.x, under linux as well. I don't remember exact build params, but as long as I remember this prevented me to use mmx intrinsics. -march=pentium4 option is irelevant (command line below). Use donly to enable mmx/sse. > g++ -v Using built-in specs. Target: mingw32 Configured with: ../gcc-4.2.1-2-src/configure --with-gcc --enable-libgomp --host=mingw32 --build=mingw32 --target=mingw32 --program-suffix=-dw2 --with-arch=i486 --with-tune=generic --disable-werror --prefix=/mingw --with-local-prefix=/mingw --enable-threads --disable-nls --enable-languages=c,c++,fortran,objc,obj-c++,ada --disable-win32-registry --disable-sjlj-exceptions --enable-libstdcxx-debug --enable-cxx-flags=-fno-function-sections -fno-data-sections --enable-version-specific-runtime-libs --disable-bootstrap Thread model: win32 gcc version 4.2.1-dw2 (mingw32-2) --- Source code --- > cat bug.cc #if defined (SSE) #include <emmintrin.h> #elif defined (MMX) #include <mmintrin.h> #endif struct X { #if defined (SSE) __m128i data[2]; #elif defined (MMX) __m64 data[2]; #else int data[2]; #endif }; void foo (X& a, const X& b) { for (int k = 0; k < 2; ++k) { #if defined (SSE) a.data[k] = _mm_xor_si128 (a.data[k], b.data[k]); #elif defined (MMX) a.data[k] = _mm_xor_si64 (a.data[k], b.data[k]); #else a.data[k] ^= b.data[k]; #endif } } --- Assembly (SSE, OK) > g++ -S -O3 bug.cc -DSSE -march=pentium4 -fomit-frame-pointer .globl __Z3fooR1XRKS_ .def __Z3fooR1XRKS_; .scl 2; .type 32; .endef __Z3fooR1XRKS_: LFB472: movl 4(%esp), %eax movl 8(%esp), %edx movdqa (%eax), %xmm0 pxor (%edx), %xmm0 movdqa %xmm0, (%eax) movdqa 16(%eax), %xmm0 pxor 16(%edx), %xmm0 movdqa %xmm0, 16(%eax) ret LFE472: --- Assembly (MMX, BAD) > g++ -S -O3 bug.cc -DMMX -march=pentium4 -fomit-frame-pointer .globl __Z3fooR1XRKS_ .def __Z3fooR1XRKS_; .scl 2; .type 32; .endef __Z3fooR1XRKS_: LFB124: subl $20, %esp <--- Useless LCFI0: movl 24(%esp), %eax movl 28(%esp), %edx movq (%eax), %mm0 movq %mm0, 8(%esp) <--- Useless pxor (%edx), %mm0 movq %mm0, (%eax) movq 8(%eax), %mm0 movq %mm0, (%esp) <--- Useless pxor 8(%edx), %mm0 movq %mm0, 8(%eax) addl $20, %esp <--- Useless ret LFE124: -- Summary: MMX bad optimization with intrinsics Product: gcc Version: 4.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: etjq78kl at free dot fr http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35142