Hello,

this patch uses &|^ for 128 bit integer vectors. I am doing the operations in type __v2du because __builtin_ia32_pand128 was apparently taking __v2di arguments, but using __v4su or any other should be equivalent. Even __int128 would in principle be ok, but since it is not usually stored in a vector register, it seems more likely to generate unexpected code (and we don't have __int256 so it would be inconsistent with other sizes).

Regtested with patch 3/n. Ok for the branch?

After that, I will post a last patch to generalize &|^ to sizes 256 and 512, and I think that will be enough for gcc-5, we should discuss merging. "< > == abs min max" can wait until gcc-6, possibly after getting some feedback about +-*/&|^.



2014-11-10  Marc Glisse  <marc.gli...@inria.fr>

        * config/i386/emmintrin.h (_mm_and_si128, _mm_or_si128, _mm_xor_si128):
        Use vector extensions instead of builtins.

--
Marc Glisse
Index: config/i386/emmintrin.h
===================================================================
--- config/i386/emmintrin.h     (revision 217249)
+++ config/i386/emmintrin.h     (working copy)
@@ -1244,39 +1244,39 @@ _mm_srl_epi32 (__m128i __A, __m128i __B)
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_srl_epi64 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_and_si128 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+  return (__m128i) ((__v2du)__A & (__v2du)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_andnot_si128 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_or_si128 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+  return (__m128i) ((__v2du)__A | (__v2du)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_xor_si128 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+  return (__m128i) ((__v2du)__A ^ (__v2du)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_cmpeq_epi16 (__m128i __A, __m128i __B)

Reply via email to