Re: [PATCH] i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx

2024-06-12 Thread Hongtao Liu
On Thu, May 30, 2024 at 1:52 PM Hu, Lin1  wrote:
>
> Hi, all
>
> This patch aims to extend __builtin_ia32_cmp[p|s][s|d] from avx to
> sse/sse2/avx, where its immediate is in range of [0, 7].
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?
Ok.
>
> BRs,
> Lin
>
> gcc/ChangeLog:
>
> * config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h,
> and move macros to xmmintrin.h
> * config/i386/emmintrin.h: Add cmp[p|s]s intrins.
> * config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d].
> * config/i386/i386-expand.cc
> (ix86_expand_args_builtin): Raise error when imm is in range of
> [8, 32] without avx.
> * config/i386/sse.md (avx_cmp3): Modefy define_insn.
> (avx_vmcmp3): Ditto.
> * config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2.
> (_CMP_LT_OS): Ditto
> (_CMP_LE_OS): Ditto
> (_CMP_UNORD_Q): Ditto
> (_CMP_NEQ_UQ): Ditto
> (_CMP_NLT_US): Ditto
> (_CMP_NLE_US): Ditto
> (_CMP_ORD_Q): Ditto
> (_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h
> (_mm_cmp_ss): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/sse-cmp-1.c: New test.
> * gcc.target/i386/sse-cmp-2.c: Ditto.
> * gcc.target/i386/sse-cmp-error-1.c: Ditto.
> ---
>  gcc/config/i386/avxintrin.h   | 56 ---
>  gcc/config/i386/emmintrin.h   | 22 +
>  gcc/config/i386/i386-builtin.def  | 10 +-
>  gcc/config/i386/i386-expand.cc|  6 ++
>  gcc/config/i386/predicates.md |  5 +
>  gcc/config/i386/sse.md| 42 
>  gcc/config/i386/xmmintrin.h   | 41 
>  gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 
>  gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++
>  gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 
>  10 files changed, 236 insertions(+), 78 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-error.c
>
> diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
> index 80214540888..ec9b9905b5f 100644
> --- a/gcc/config/i386/avxintrin.h
> +++ b/gcc/config/i386/avxintrin.h
> @@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ 
> (32),
>
>  /* Compare predicates for scalar and packed compare intrinsics.  */
>
> -/* Equal (ordered, non-signaling)  */
> -#define _CMP_EQ_OQ 0x00
> -/* Less-than (ordered, signaling)  */
> -#define _CMP_LT_OS 0x01
> -/* Less-than-or-equal (ordered, signaling)  */
> -#define _CMP_LE_OS 0x02
> -/* Unordered (non-signaling)  */
> -#define _CMP_UNORD_Q   0x03
> -/* Not-equal (unordered, non-signaling)  */
> -#define _CMP_NEQ_UQ0x04
> -/* Not-less-than (unordered, signaling)  */
> -#define _CMP_NLT_US0x05
> -/* Not-less-than-or-equal (unordered, signaling)  */
> -#define _CMP_NLE_US0x06
> -/* Ordered (nonsignaling)   */
> -#define _CMP_ORD_Q 0x07
>  /* Equal (unordered, non-signaling)  */
>  #define _CMP_EQ_UQ 0x08
>  /* Not-greater-than-or-equal (unordered, signaling)  */
> @@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B)
>  }
>
>  #ifdef __OPTIMIZE__
> -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> -_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
> -{
> -  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
> -}
> -
> -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> -_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
> -{
> -  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
> -}
> -
>  extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
>  _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
>  {
> @@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
>return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
>__P);
>  }
> -
> -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> -_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
> -{
> -  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
> -}
> -
> -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> -_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
> -{
> -  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
> -}
>  #else
> -#define _mm_cmp_pd(X, Y, P)\
> -  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),   \
> -  (__v2df)(__m128d)(Y), (int)(P)))
> -
> -#define 

[PATCH] i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx

2024-05-29 Thread Hu, Lin1
Hi, all

This patch aims to extend __builtin_ia32_cmp[p|s][s|d] from avx to
sse/sse2/avx, where its immediate is in range of [0, 7].

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

BRs,
Lin

gcc/ChangeLog:

* config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h,
and move macros to xmmintrin.h
* config/i386/emmintrin.h: Add cmp[p|s]s intrins.
* config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d].
* config/i386/i386-expand.cc
(ix86_expand_args_builtin): Raise error when imm is in range of
[8, 32] without avx.
* config/i386/sse.md (avx_cmp3): Modefy define_insn.
(avx_vmcmp3): Ditto.
* config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2.
(_CMP_LT_OS): Ditto
(_CMP_LE_OS): Ditto
(_CMP_UNORD_Q): Ditto
(_CMP_NEQ_UQ): Ditto
(_CMP_NLT_US): Ditto
(_CMP_NLE_US): Ditto
(_CMP_ORD_Q): Ditto
(_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h
(_mm_cmp_ss): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sse-cmp-1.c: New test.
* gcc.target/i386/sse-cmp-2.c: Ditto.
* gcc.target/i386/sse-cmp-error-1.c: Ditto.
---
 gcc/config/i386/avxintrin.h   | 56 ---
 gcc/config/i386/emmintrin.h   | 22 +
 gcc/config/i386/i386-builtin.def  | 10 +-
 gcc/config/i386/i386-expand.cc|  6 ++
 gcc/config/i386/predicates.md |  5 +
 gcc/config/i386/sse.md| 42 
 gcc/config/i386/xmmintrin.h   | 41 
 gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 
 gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++
 gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 
 10 files changed, 236 insertions(+), 78 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-error.c

diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index 80214540888..ec9b9905b5f 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ 
(32),
 
 /* Compare predicates for scalar and packed compare intrinsics.  */
 
-/* Equal (ordered, non-signaling)  */
-#define _CMP_EQ_OQ 0x00
-/* Less-than (ordered, signaling)  */
-#define _CMP_LT_OS 0x01
-/* Less-than-or-equal (ordered, signaling)  */
-#define _CMP_LE_OS 0x02
-/* Unordered (non-signaling)  */
-#define _CMP_UNORD_Q   0x03
-/* Not-equal (unordered, non-signaling)  */
-#define _CMP_NEQ_UQ0x04
-/* Not-less-than (unordered, signaling)  */
-#define _CMP_NLT_US0x05
-/* Not-less-than-or-equal (unordered, signaling)  */
-#define _CMP_NLE_US0x06
-/* Ordered (nonsignaling)   */
-#define _CMP_ORD_Q 0x07
 /* Equal (unordered, non-signaling)  */
 #define _CMP_EQ_UQ 0x08
 /* Not-greater-than-or-equal (unordered, signaling)  */
@@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B)
 }
 
 #ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
-}
-
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
 {
@@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
   return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
   __P);
 }
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
-}
 #else
-#define _mm_cmp_pd(X, Y, P)\
-  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),   \
-  (__v2df)(__m128d)(Y), (int)(P)))
-
-#define _mm_cmp_ps(X, Y, P)\
-  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P)))
-
 #define _mm256_cmp_pd(X, Y, P) \
   ((__m256d)