Hello,
this patch uses &|^ for 128 bit integer vectors. I am doing the operations
in type __v2du because __builtin_ia32_pand128 was apparently taking __v2di
arguments, but using __v4su or any other should be equivalent. Even
__int128 would in principle be ok, but since it is not usually stored in a
vector register, it seems more likely to generate unexpected code (and we
don't have __int256 so it would be inconsistent with other sizes).
Regtested with patch 3/n. Ok for the branch?
After that, I will post a last patch to generalize &|^ to sizes 256 and
512, and I think that will be enough for gcc-5, we should discuss merging.
"< > == abs min max" can wait until gcc-6, possibly after getting some
feedback about +-*/&|^.
2014-11-10 Marc Glisse <marc.gli...@inria.fr>
* config/i386/emmintrin.h (_mm_and_si128, _mm_or_si128, _mm_xor_si128):
Use vector extensions instead of builtins.
--
Marc Glisse
Index: config/i386/emmintrin.h
===================================================================
--- config/i386/emmintrin.h (revision 217249)
+++ config/i386/emmintrin.h (working copy)
@@ -1244,39 +1244,39 @@ _mm_srl_epi32 (__m128i __A, __m128i __B)
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_srl_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_and_si128 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+ return (__m128i) ((__v2du)__A & (__v2du)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_andnot_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_or_si128 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+ return (__m128i) ((__v2du)__A | (__v2du)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_xor_si128 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+ return (__m128i) ((__v2du)__A ^ (__v2du)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)