On Fri, Apr 23, 2021 at 3:18 PM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Fri, Apr 23, 2021 at 9:15 AM Hongtao Liu <crazy...@gmail.com> wrote:
> >
> > On Fri, Apr 23, 2021 at 2:50 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> > >
> > > On Fri, Apr 23, 2021 at 8:36 AM Hongtao Liu <crazy...@gmail.com> wrote:
> > > >
> > > > Hi:
> > > >   The patch is a follow-up to
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2021-January/564320.html.
> > > >   Bootstrapped and regtested on x86-64_iinux-gnu{-m32,}.
> > > >   Ok for trunk?
> > > > gcc/ChangeLog:
> > > >
> > > >         PR target/98911
> > > >         * config/i386/i386-builtin.def (BDESC): Change the icode of
> > > >         the following builtins to CODE_FOR_nothing.
> > > >         * config/i386/i386.c (ix86_gimple_fold_builtin): Fold
> > > >         IX86_BUILTIN_PCMPEQB128, IX86_BUILTIN_PCMPEQW128,
> > > >         IX86_BUILTIN_PCMPEQD128, IX86_BUILTIN_PCMPEQQ,
> > > >         IX86_BUILTIN_PCMPEQB256, IX86_BUILTIN_PCMPEQW256,
> > > >         IX86_BUILTIN_PCMPEQD256, IX86_BUILTIN_PCMPEQQ256,
> > > >         IX86_BUILTIN_PCMPGTB128, IX86_BUILTIN_PCMPGTW128,
> > > >         IX86_BUILTIN_PCMPGTD128, IX86_BUILTIN_PCMPGTQ,
> > > >         IX86_BUILTIN_PCMPGTB256, IX86_BUILTIN_PCMPGTW256,
> > > >         IX86_BUILTIN_PCMPGTD256, IX86_BUILTIN_PCMPGTQ256.
> > > >         * config/i386/sse.md (avx2_eq<mode>3): Deleted.
> > > >         (sse2_eq<mode>3): Ditto.
> > > >         (sse2_gt<mode>3): Rename to ..
> > > >         (*sse2_gt<mode>3): .. this.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         PR target/98911
> > > >         * gcc.target/i386/pr98911.c: New test.
> > > >         * gcc.target/i386/funcspec-8.c: Remove __builtin_ia32_pcmpgtq
> > > >         since it has been folded.
> > >
> > >
> > > -(define_expand "sse2_eq<mode>3"
> > > -  [(set (match_operand:VI124_128 0 "register_operand")
> > > -    (eq:VI124_128
> > > -      (match_operand:VI124_128 1 "vector_operand")
> > > -      (match_operand:VI124_128 2 "vector_operand")))]
> > > -  "TARGET_SSE2 && !TARGET_XOP "
> > > -  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
> > > -
> > >  (define_expand "sse4_1_eqv2di3"
> > >    [(set (match_operand:V2DI 0 "register_operand")
> > >      (eq:V2DI
> > >
> > > You can also remove sse4_1_eqv2di3 expander.
> >
> > Oh, yes.
> >
> > >
> > > -#ifdef __SSE4_2__
> > > -#error "-msse4.2 should not be set for this test"
> > > -#endif
> > > -
> > > -__m128i sse4_2_pcmpgtq (__m128i a, __m128i b)
> > > __attribute__((__target__("sse4.2")));
> > > -__m128i generic_pcmpgtq (__m128i ab, __m128i b);
> > > -
> > > -__m128i
> > > -sse4_2_pcmpgtq (__m128i a, __m128i b)
> > > -{
> > > -  return __builtin_ia32_pcmpgtq (a, b);
> > > -}
> > > -
> > > -__m128i
> > > -generic_pcmpgtq (__m128i a, __m128i b)
> > > -{
> > > -  return __builtin_ia32_pcmpgtq (a, b);            /* { dg-error
> > > "needs isa option" } */
> > > -}
> > >
> > > Why remove the above? It is testing isa options, it has nothing to do
> > > with improved folding.
> >
> > If the backend does not support the corresponding instruction, the
> > vector operation will be automatically lowered to scalar, so no error
> > will be reported
>
> I see. It would be nice to change the test to use some other SSE4.2
> builtin (there are plenty of packed compares) and not remove it
> altogether.

Updated patch.

>
> Uros.



-- 
BR,
Hongtao
From 31b3110300b9661b5a7bb5811d487ea35dbab8e9 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Tue, 23 Feb 2021 11:17:40 +0800
Subject: [PATCH] Add folding and remove expanders for x86 *pcmp{et,gt}*
 builtins [PR target/98911]

gcc/ChangeLog:

	PR target/98911
	* config/i386/i386-builtin.def (BDESC): Change the icode of
	the following builtins to CODE_FOR_nothing.
	* config/i386/i386.c (ix86_gimple_fold_builtin): Fold
	IX86_BUILTIN_PCMPEQB128, IX86_BUILTIN_PCMPEQW128,
	IX86_BUILTIN_PCMPEQD128, IX86_BUILTIN_PCMPEQQ,
	IX86_BUILTIN_PCMPEQB256, IX86_BUILTIN_PCMPEQW256,
	IX86_BUILTIN_PCMPEQD256, IX86_BUILTIN_PCMPEQQ256,
	IX86_BUILTIN_PCMPGTB128, IX86_BUILTIN_PCMPGTW128,
	IX86_BUILTIN_PCMPGTD128, IX86_BUILTIN_PCMPGTQ,
	IX86_BUILTIN_PCMPGTB256, IX86_BUILTIN_PCMPGTW256,
	IX86_BUILTIN_PCMPGTD256, IX86_BUILTIN_PCMPGTQ256.
	* config/i386/sse.md (avx2_eq<mode>3): Deleted.
	(sse2_eq<mode>3): Ditto.
	(sse4_1_eqv2di3): Ditto.
	(sse2_gt<mode>3): Rename to ..
	(*sse2_gt<mode>3): .. this.

gcc/testsuite/ChangeLog:

	PR target/98911
	* gcc.target/i386/pr98911.c: New test.
	* gcc.target/i386/funcspec-8.c: Replace __builtin_ia32_pcmpgtq
	with __builtin_ia32_pcmpistrm128 since it has been folded.
---
 gcc/config/i386/i386-builtin.def           |  32 +++---
 gcc/config/i386/i386.c                     |  44 ++++++++
 gcc/config/i386/sse.md                     |  26 +----
 gcc/testsuite/gcc.target/i386/funcspec-8.c |  17 +--
 gcc/testsuite/gcc.target/i386/pr98911.c    | 116 +++++++++++++++++++++
 5 files changed, 186 insertions(+), 49 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98911.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index e3ed4e1578f..4dbd4f23647 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -773,12 +773,12 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX8
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI )
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI )
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI )
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI )
 
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
@@ -919,7 +919,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__built
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
@@ -962,7 +962,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_pte
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST)
 
 /* SSE4.2 */
-BDESC (OPTION_MASK_ISA_SSE4_2, 0, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI)
+BDESC (OPTION_MASK_ISA_SSE4_2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI)
 BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR)
 BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT)
 BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT)
@@ -1149,14 +1149,14 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb2
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI )
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI )
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI )
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI )
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI )
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI )
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI )
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI )
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7c41302c75b..e9d7be3970f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -67,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "pass_manager.h"
 #include "target-globals.h"
 #include "gimple-iterator.h"
+#include "gimple-fold.h"
 #include "tree-vectorizer.h"
 #include "shrink-wrap.h"
 #include "builtins.h"
@@ -17866,6 +17867,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   tree decl = NULL_TREE;
   tree arg0, arg1, arg2;
   enum rtx_code rcode;
+  enum tree_code tcode;
   unsigned HOST_WIDE_INT count;
   bool is_vshift;
 
@@ -17947,6 +17949,48 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	}
       break;
 
+    case IX86_BUILTIN_PCMPEQB128:
+    case IX86_BUILTIN_PCMPEQW128:
+    case IX86_BUILTIN_PCMPEQD128:
+    case IX86_BUILTIN_PCMPEQQ:
+    case IX86_BUILTIN_PCMPEQB256:
+    case IX86_BUILTIN_PCMPEQW256:
+    case IX86_BUILTIN_PCMPEQD256:
+    case IX86_BUILTIN_PCMPEQQ256:
+      tcode = EQ_EXPR;
+      goto do_cmp;
+
+    case IX86_BUILTIN_PCMPGTB128:
+    case IX86_BUILTIN_PCMPGTW128:
+    case IX86_BUILTIN_PCMPGTD128:
+    case IX86_BUILTIN_PCMPGTQ:
+    case IX86_BUILTIN_PCMPGTB256:
+    case IX86_BUILTIN_PCMPGTW256:
+    case IX86_BUILTIN_PCMPGTD256:
+    case IX86_BUILTIN_PCMPGTQ256:
+      tcode = GT_EXPR;
+
+    do_cmp:
+      gcc_assert (n_args == 2);
+      arg0 = gimple_call_arg (stmt, 0);
+      arg1 = gimple_call_arg (stmt, 1);
+      {
+	location_t loc = gimple_location (stmt);
+	tree type = TREE_TYPE (arg0);
+	tree zero_vec = build_zero_cst (type);
+	tree minus_one_vec = build_minus_one_cst (type);
+	tree cmp_type = truth_type_for (type);
+	gimple_seq stmts = NULL;
+	tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
+	gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
+	gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
+					 VEC_COND_EXPR, cmp,
+					 minus_one_vec, zero_vec);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, false);
+      }
+      return true;
+
     case IX86_BUILTIN_PSLLD:
     case IX86_BUILTIN_PSLLD128:
     case IX86_BUILTIN_PSLLD128_MASK:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9d3728d1cb0..ec1c5affd10 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12875,14 +12875,6 @@ (define_insn "*<code>v16qi3"
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_expand "avx2_eq<mode>3"
-  [(set (match_operand:VI_256 0 "register_operand")
-	(eq:VI_256
-	  (match_operand:VI_256 1 "nonimmediate_operand")
-	  (match_operand:VI_256 2 "nonimmediate_operand")))]
-  "TARGET_AVX2"
-  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
-
 (define_insn "*avx2_eq<mode>3"
   [(set (match_operand:VI_256 0 "register_operand" "=x")
 	(eq:VI_256
@@ -13058,22 +13050,6 @@ (define_insn "*sse2_eq<mode>3"
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_expand "sse2_eq<mode>3"
-  [(set (match_operand:VI124_128 0 "register_operand")
-	(eq:VI124_128
-	  (match_operand:VI124_128 1 "vector_operand")
-	  (match_operand:VI124_128 2 "vector_operand")))]
-  "TARGET_SSE2 && !TARGET_XOP "
-  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
-
-(define_expand "sse4_1_eqv2di3"
-  [(set (match_operand:V2DI 0 "register_operand")
-	(eq:V2DI
-	  (match_operand:V2DI 1 "vector_operand")
-	  (match_operand:V2DI 2 "vector_operand")))]
-  "TARGET_SSE4_1"
-  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
-
 (define_insn "sse4_2_gtv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
 	(gt:V2DI
@@ -13144,7 +13120,7 @@ (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "sse2_gt<mode>3"
+(define_insn "*sse2_gt<mode>3"
   [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
 	(gt:VI124_128
 	  (match_operand:VI124_128 1 "register_operand" "0,x")
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-8.c b/gcc/testsuite/gcc.target/i386/funcspec-8.c
index 8bd95fc2d39..0a6c709003a 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-8.c
+++ b/gcc/testsuite/gcc.target/i386/funcspec-8.c
@@ -8,6 +8,7 @@ typedef float     __m128  __attribute__ ((__vector_size__ (16), __may_alias__));
 typedef double    __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
 typedef int	  __m128w __attribute__ ((__vector_size__ (16), __may_alias__));
 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef char __m128qi __attribute__ ((__vector_size__ (16), __may_alias__));
 
 #ifdef __SSE3__
 #error "-msse3 should not be set for this test"
@@ -70,19 +71,19 @@ generic_blendvpd  (__m128d a, __m128d b, __m128d c)
 #error "-msse4.2 should not be set for this test"
 #endif
 
-__m128i sse4_2_pcmpgtq (__m128i a, __m128i b) __attribute__((__target__("sse4.2")));
-__m128i generic_pcmpgtq (__m128i ab, __m128i b);
+__m128qi sse4_2_cmpistrm (__m128qi a, __m128qi b) __attribute__((__target__("sse4.2")));
+__m128qi generic_cmpistrm (__m128qi a, __m128qi b);
 
-__m128i
-sse4_2_pcmpgtq (__m128i a, __m128i b)
+__m128qi
+sse4_2_cmpistrm (__m128qi a, __m128qi b)
 {
-  return __builtin_ia32_pcmpgtq (a, b);
+  return  __builtin_ia32_pcmpistrm128 (a, b, 0);
 }
 
-__m128i
-generic_pcmpgtq (__m128i a, __m128i b)
+__m128qi
+generic_comistrm (__m128qi a, __m128qi b)
 {
-  return __builtin_ia32_pcmpgtq (a, b);			/* { dg-error "needs isa option" } */
+  return  __builtin_ia32_pcmpistrm128 (a, b, 0);	/* { dg-error "needs isa option" } */
 }
 
 #ifdef __SSE4A__
diff --git a/gcc/testsuite/gcc.target/i386/pr98911.c b/gcc/testsuite/gcc.target/i386/pr98911.c
new file mode 100644
index 00000000000..f08cea04858
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98911.c
@@ -0,0 +1,116 @@
+/* PR target/98911  */
+/* { dg-do compile } */
+/* { dg-options "-O -mavx2" } */
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpeqd" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpeqq" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtq" 2 } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+v16qi
+f1 (v16qi a, v16qi b)
+{
+  return __builtin_ia32_pcmpeqb128 (a, b);
+}
+
+v8hi
+f2 (v8hi a, v8hi b)
+{
+  return __builtin_ia32_pcmpeqw128 (a, b);
+}
+
+v4si
+f3 (v4si a, v4si b)
+{
+  return __builtin_ia32_pcmpeqd128 (a, b);
+}
+
+v2di
+f4 (v2di a, v2di b)
+{
+  return __builtin_ia32_pcmpeqq (a, b);
+}
+
+v16qi
+f5 (v16qi a, v16qi b)
+{
+  return __builtin_ia32_pcmpgtb128 (a, b);
+}
+
+v8hi
+f6 (v8hi a, v8hi b)
+{
+  return __builtin_ia32_pcmpgtw128 (a, b);
+}
+
+v4si
+f7 (v4si a, v4si b)
+{
+  return __builtin_ia32_pcmpgtd128 (a, b);
+}
+
+v2di
+f8 (v2di a, v2di b)
+{
+  return __builtin_ia32_pcmpgtq (a, b);
+}
+
+v32qi
+f9 (v32qi a, v32qi b)
+{
+  return __builtin_ia32_pcmpeqb256 (a, b);
+}
+
+v16hi
+f10 (v16hi a, v16hi b)
+{
+  return __builtin_ia32_pcmpeqw256 (a, b);
+}
+
+v8si
+f11 (v8si a, v8si b)
+{
+  return __builtin_ia32_pcmpeqd256 (a, b);
+}
+
+v4di
+f12 (v4di a, v4di b)
+{
+  return __builtin_ia32_pcmpeqq256 (a, b);
+}
+
+v32qi
+f13 (v32qi a, v32qi b)
+{
+  return __builtin_ia32_pcmpgtb256 (a, b);
+}
+
+v16hi
+f14 (v16hi a, v16hi b)
+{
+  return __builtin_ia32_pcmpgtw256 (a, b);
+}
+
+v8si
+f15 (v8si a, v8si b)
+{
+  return __builtin_ia32_pcmpgtd256 (a, b);
+}
+
+v4di
+f16 (v4di a, v4di b)
+{
+  return __builtin_ia32_pcmpgtq256 (a, b);
+}
-- 
2.18.1

Reply via email to