PR target/110438 requests to emit PXOR before VPTERNLOG. This patch implements that.
From 815779936d0ca213b4c9ec798ed6acf8179fc2e7 Mon Sep 17 00:00:00 2001
From: Yan Simonaytes <simonaytes....@ispras.ru>
Date: Tue, 4 Jul 2023 21:11:04 +0300
Subject: [PATCH] Generating all-ones zmm needs dep-breaking pxor before
 ternlog

	PR target/110438

gcc/ChangeLog:

        * config/i386/i386.cc (standard_sse_constant_opcode): Emit PXOR before VPTERNLOG.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr110438-1.c: New test.
        * gcc.target/i386/pr110438-2.c: New test.
        * gcc.target/i386/pr110438-3.c: New test.
---
 gcc/config/i386/i386.cc                    | 23 +++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr110438-1.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr110438-2.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr110438-3.c | 12 +++++++++++
 4 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 8989985700a..89e0072caa1 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5329,6 +5329,13 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	case MODE_V8DF:
 	case MODE_V16SF:
 	  gcc_assert (TARGET_AVX512F);
+	  if (optimize_insn_for_speed_p ())
+	    {
+	      if (TARGET_AVX512VL)
+		output_asm_insn ("vpxor\t%x0, %x0, %x0", operands);
+	      else
+		output_asm_insn ("vpxor\t%g0, %g0, %g0", operands);
+	    }
 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 
 	case MODE_OI:
@@ -5344,10 +5351,20 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    return (TARGET_AVX
 		    ? "vpcmpeqd\t%0, %0, %0"
 		    : "pcmpeqd\t%0, %0");
-	  else if (TARGET_AVX512VL)
-	    return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
 	  else
-	    return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	    { 
+	      if (optimize_insn_for_speed_p ())
+		{
+		  if (TARGET_AVX512VL)
+		    output_asm_insn ("vpxor\t%x0, %x0, %x0", operands);
+		  else
+		    output_asm_insn ("vpxor\t%g0, %g0, %g0", operands);
+		}
+	      if (TARGET_AVX512VL)
+		return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
+	      else
+		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	    }
 
 	default:
 	  gcc_unreachable ();
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-1.c b/gcc/testsuite/gcc.target/i386/pr110438-1.c
new file mode 100644
index 00000000000..0c5f4470e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-1.c
@@ -0,0 +1,12 @@
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler "vpxor\t%z" } }*/
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-2.c b/gcc/testsuite/gcc.target/i386/pr110438-2.c
new file mode 100644
index 00000000000..14770a972e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-2.c
@@ -0,0 +1,12 @@
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler "vpxor\t%x" } }*/
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-3.c b/gcc/testsuite/gcc.target/i386/pr110438-3.c
new file mode 100644
index 00000000000..fb07997839c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-3.c
@@ -0,0 +1,12 @@
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -Os" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler-not "vpxor" } }*/
-- 
2.34.1

Reply via email to