--- Comment #2 from kretz at kde dot org 2010-03-26 13:13 ---
BTW, I think you should consider this as a bug, not enhancement. Because if the
shift argument is >= (1 << 32) the result of _mm_sll_epi64 differs from the
expected result.
Thus the code GCC generates could potentially hide a bug, making the code
behave differently (but correct) when compiled by a different compiler.
Testcase:
#include
__m128i intrin()
{
__m128i a = _mm_setzero_si128();
a = _mm_cmpeq_epi8(a, a);
__m128i count = _mm_sad_epu8(_mm_abs_epi8(a), _mm_setzero_si128()); // 8 0
... 8 0 ...
count = _mm_slli_epi64(count, 29);
return _mm_sll_epi64(a, count);
}
__m128i assem()
{
register __m128i r asm("xmm0");
asm(
"pxor %%xmm1,%%xmm1\n\t" // 0x
"pcmpeqb %%xmm0,%%xmm0\n\t"// 0x
"pabsb %%xmm0,%%xmm2\n\t" // 0x0101
"psadbw %%xmm1,%%xmm2\n\t" // 0x0008
"psllq $29,%%xmm2\n\t" // 0x0100
"psllq %%xmm2,%%xmm0\n\t"
:::"xmm0", "xmm1", "xmm2");
return r;
}
int main()
{
const __m128i a = intrin();
const __m128i b = assem();
return (_mm_movemask_epi8(_mm_cmpeq_epi32(a, b)) == 0x) ? 0 : -1;
}
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43514