Re: [PATCH] i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx
On Thu, May 30, 2024 at 1:52 PM Hu, Lin1 wrote: > > Hi, all > > This patch aims to extend __builtin_ia32_cmp[p|s][s|d] from avx to > sse/sse2/avx, where its immediate is in range of [0, 7]. > > Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk? Ok. > > BRs, > Lin > > gcc/ChangeLog: > > * config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h, > and move macros to xmmintrin.h > * config/i386/emmintrin.h: Add cmp[p|s]s intrins. > * config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d]. > * config/i386/i386-expand.cc > (ix86_expand_args_builtin): Raise error when imm is in range of > [8, 32] without avx. > * config/i386/sse.md (avx_cmp3): Modefy define_insn. > (avx_vmcmp3): Ditto. > * config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2. > (_CMP_LT_OS): Ditto > (_CMP_LE_OS): Ditto > (_CMP_UNORD_Q): Ditto > (_CMP_NEQ_UQ): Ditto > (_CMP_NLT_US): Ditto > (_CMP_NLE_US): Ditto > (_CMP_ORD_Q): Ditto > (_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h > (_mm_cmp_ss): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/sse-cmp-1.c: New test. > * gcc.target/i386/sse-cmp-2.c: Ditto. > * gcc.target/i386/sse-cmp-error-1.c: Ditto. > --- > gcc/config/i386/avxintrin.h | 56 --- > gcc/config/i386/emmintrin.h | 22 + > gcc/config/i386/i386-builtin.def | 10 +- > gcc/config/i386/i386-expand.cc| 6 ++ > gcc/config/i386/predicates.md | 5 + > gcc/config/i386/sse.md| 42 > gcc/config/i386/xmmintrin.h | 41 > gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 > gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++ > gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 > 10 files changed, 236 insertions(+), 78 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-error.c > > diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h > index 80214540888..ec9b9905b5f 100644 > --- a/gcc/config/i386/avxintrin.h > +++ b/gcc/config/i386/avxintrin.h > @@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ > (32), > > /* Compare predicates for scalar and packed compare intrinsics. */ > > -/* Equal (ordered, non-signaling) */ > -#define _CMP_EQ_OQ 0x00 > -/* Less-than (ordered, signaling) */ > -#define _CMP_LT_OS 0x01 > -/* Less-than-or-equal (ordered, signaling) */ > -#define _CMP_LE_OS 0x02 > -/* Unordered (non-signaling) */ > -#define _CMP_UNORD_Q 0x03 > -/* Not-equal (unordered, non-signaling) */ > -#define _CMP_NEQ_UQ0x04 > -/* Not-less-than (unordered, signaling) */ > -#define _CMP_NLT_US0x05 > -/* Not-less-than-or-equal (unordered, signaling) */ > -#define _CMP_NLE_US0x06 > -/* Ordered (nonsignaling) */ > -#define _CMP_ORD_Q 0x07 > /* Equal (unordered, non-signaling) */ > #define _CMP_EQ_UQ 0x08 > /* Not-greater-than-or-equal (unordered, signaling) */ > @@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B) > } > > #ifdef __OPTIMIZE__ > -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) > -{ > - return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); > -} > - > -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) > -{ > - return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); > -} > - > extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P) > { > @@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P) >return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y, >__P); > } > - > -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) > -{ > - return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); > -} > - > -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) > -{ > - return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); > -} > #else > -#define _mm_cmp_pd(X, Y, P)\ > - ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ > - (__v2df)(__m128d)(Y), (int)(P))) > - > -#define
[PATCH] i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx
Hi, all This patch aims to extend __builtin_ia32_cmp[p|s][s|d] from avx to sse/sse2/avx, where its immediate is in range of [0, 7]. Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk? BRs, Lin gcc/ChangeLog: * config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h, and move macros to xmmintrin.h * config/i386/emmintrin.h: Add cmp[p|s]s intrins. * config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d]. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Raise error when imm is in range of [8, 32] without avx. * config/i386/sse.md (avx_cmp3): Modefy define_insn. (avx_vmcmp3): Ditto. * config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2. (_CMP_LT_OS): Ditto (_CMP_LE_OS): Ditto (_CMP_UNORD_Q): Ditto (_CMP_NEQ_UQ): Ditto (_CMP_NLT_US): Ditto (_CMP_NLE_US): Ditto (_CMP_ORD_Q): Ditto (_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h (_mm_cmp_ss): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/sse-cmp-1.c: New test. * gcc.target/i386/sse-cmp-2.c: Ditto. * gcc.target/i386/sse-cmp-error-1.c: Ditto. --- gcc/config/i386/avxintrin.h | 56 --- gcc/config/i386/emmintrin.h | 22 + gcc/config/i386/i386-builtin.def | 10 +- gcc/config/i386/i386-expand.cc| 6 ++ gcc/config/i386/predicates.md | 5 + gcc/config/i386/sse.md| 42 gcc/config/i386/xmmintrin.h | 41 gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++ gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 10 files changed, 236 insertions(+), 78 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-error.c diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 80214540888..ec9b9905b5f 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ (32), /* Compare predicates for scalar and packed compare intrinsics. */ -/* Equal (ordered, non-signaling) */ -#define _CMP_EQ_OQ 0x00 -/* Less-than (ordered, signaling) */ -#define _CMP_LT_OS 0x01 -/* Less-than-or-equal (ordered, signaling) */ -#define _CMP_LE_OS 0x02 -/* Unordered (non-signaling) */ -#define _CMP_UNORD_Q 0x03 -/* Not-equal (unordered, non-signaling) */ -#define _CMP_NEQ_UQ0x04 -/* Not-less-than (unordered, signaling) */ -#define _CMP_NLT_US0x05 -/* Not-less-than-or-equal (unordered, signaling) */ -#define _CMP_NLE_US0x06 -/* Ordered (nonsignaling) */ -#define _CMP_ORD_Q 0x07 /* Equal (unordered, non-signaling) */ #define _CMP_EQ_UQ 0x08 /* Not-greater-than-or-equal (unordered, signaling) */ @@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B) } #ifdef __OPTIMIZE__ -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) -{ - return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) -{ - return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); -} - extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P) { @@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P) return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y, __P); } - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) -{ - return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) -{ - return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); -} #else -#define _mm_cmp_pd(X, Y, P)\ - ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P))) - -#define _mm_cmp_ps(X, Y, P)\ - ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P))) - #define _mm256_cmp_pd(X, Y, P) \ ((__m256d)