Hello,
< > and == for integer vectors of size 128. I was surprised not to find
_mm_cmplt_epi64 anywhere. Note that I can do the same for size 256, but
not 512, there is no corresponding intrinsic, there are only _mask
versions that return a mask.
For gcc-5, we should stop either after 5/n or after 7/n (avx2 version of
6/n).
Regtested with 5/n.
2014-11-10 Marc Glisse
* config/i386/emmintrin.h (_mm_cmpeq_epi8, _mm_cmpeq_epi16,
_mm_cmpeq_epi32, _mm_cmplt_epi8, _mm_cmplt_epi16, _mm_cmplt_epi32,
_mm_cmpgt_epi8, _mm_cmpgt_epi16, _mm_cmpgt_epi32): Use vector
extensions instead of builtins.
* config/i386/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64):
Likewise.
--
Marc GlisseIndex: emmintrin.h
===
--- emmintrin.h (revision 217263)
+++ emmintrin.h (working copy)
@@ -1268,69 +1268,69 @@ _mm_or_si128 (__m128i __A, __m128i __B)
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_xor_si128 (__m128i __A, __m128i __B)
{
return (__m128i) ((__v2du)__A ^ (__v2du)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+ return (__m128i) ((__v16qi)__A == (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+ return (__m128i) ((__v8hi)__A == (__v8hi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i) ((__v4si)__A == (__v4si)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmplt_epi8 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+ return (__m128i) ((__v16qi)__A < (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmplt_epi16 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+ return (__m128i) ((__v8hi)__A < (__v8hi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmplt_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+ return (__m128i) ((__v4si)__A < (__v4si)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+ return (__m128i) ((__v16qi)__A > (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+ return (__m128i) ((__v8hi)__A > (__v8hi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i) ((__v4si)__A > (__v4si)__B);
}
#ifdef __OPTIMIZE__
extern __inline int __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_extract_epi16 (__m128i const __A, int const __N)
{
return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
Index: smmintrin.h
===
--- smmintrin.h (revision 217259)
+++ smmintrin.h (working copy)
@@ -260,21 +260,21 @@ _mm_dp_pd (__m128d __X, __m128d __Y, con
#define _mm_dp_pd(X, Y, M) \
((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),
\
(__v2df)(__m128d)(Y), (int)(M)))
#endif
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
{
- return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+ return (__m128i) ((__v2di)__X == (__v2di)__Y);
}
/* Min/max packed integer instructions. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_min_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
}
@@ -788