aba3b9d3a48a0703fd565f7c5f0caf604f59970b is the first bad commit
commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Fri May 9 07:17:07 2025 +0800

    x86: Extend the remove_redundant_vector pass

which removed non all 0s/1s redundant vector loads, caused SPEC CPU 2017
519.lbm_r and 470.lbm performance regressions on AMD znverN processors.
Add a tuning option to keep non all 0s/1s redundant vector loads on AMD
znverN processors.

gcc/

PR target/120941
* config/i386/i386-features.cc (ix86_broadcast_inner): Keep
non all 0s/1s redundant vector loads if asked.
* config/i386/x86-tune.def (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD):
New tuning.

gcc/testsuite/

PR target/120941
* gcc.target/i386/pr120941-1a.c: New test.
* gcc.target/i386/pr120941-1b.c: Likewise.
* gcc.target/i386/pr120941-1c.c: Likewise.
* gcc.target/i386/pr120941-1d.c: Likewise.

OK for master?

Thanks.

-- 
H.J.
From 27ca9842b54b9e3f585e23abe5fa6b21aa043c73 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sat, 5 Jul 2025 04:12:47 +0800
Subject: [PATCH] x86: Keep non all 0s/1s redundant vector loads on AMD znverN

aba3b9d3a48a0703fd565f7c5f0caf604f59970b is the first bad commit
commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Fri May 9 07:17:07 2025 +0800

    x86: Extend the remove_redundant_vector pass

which removed non all 0s/1s redundant vector loads, caused SPEC CPU 2017
519.lbm_r and 470.lbm performance regressions on AMD znverN processors.
Add a tuning option to keep non all 0s/1s redundant vector loads on AMD
znverN processors.

gcc/

	PR target/120941
	* config/i386/i386-features.cc (ix86_broadcast_inner): Keep
	non all 0s/1s redundant vector loads if asked.
	* config/i386/x86-tune.def (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD):
	New tuning.

gcc/testsuite/

	PR target/120941
	* gcc.target/i386/pr120941-1a.c: New test.
	* gcc.target/i386/pr120941-1b.c: Likewise.
	* gcc.target/i386/pr120941-1c.c: Likewise.
	* gcc.target/i386/pr120941-1d.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386-features.cc            |  4 ++++
 gcc/config/i386/x86-tune.def                |  4 ++++
 gcc/testsuite/gcc.target/i386/pr120941-1a.c | 19 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr120941-1b.c |  5 +++++
 gcc/testsuite/gcc.target/i386/pr120941-1c.c |  5 +++++
 gcc/testsuite/gcc.target/i386/pr120941-1d.c |  5 +++++
 6 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1d.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 054f8d5ddc8..574eaf2e4d2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3552,6 +3552,10 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
       return constm1_rtx;
     }
 
+  /* Skip if non all 0s/1s redundant vector loads should be kept.  */
+  if (ix86_tune_features[X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD])
+    return nullptr;
+
   mode = GET_MODE (op);
   int nunits = GET_MODE_NUNITS (mode);
   if (nunits < 2)
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 91cdca7fbfc..d0b1da007f9 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -639,6 +639,10 @@ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
 DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues",
 	  m_ZNVER4 | m_ZNVER5)
 
+/* X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD: Keep redundant vector loads.  */
+DEF_TUNE (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD, "keep_redundant_vector_load",
+	  m_ZNVER)
+
 /*****************************************************************************/
 /*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
diff --git a/gcc/testsuite/gcc.target/i386/pr120941-1a.c b/gcc/testsuite/gcc.target/i386/pr120941-1a.c
new file mode 100644
index 00000000000..daced44b4b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120941-1a.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2 -mtune=znver5" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+
+#include <immintrin.h>
+
+extern __m512i sinkz;
+extern __m256i sinky;
+extern char f;
+
+void
+foo(char c, int x)
+{
+  c += f;
+  sinkz = _mm512_set1_epi8(c);
+  if (x == 2)
+    f += 3;
+  sinky = _mm256_set1_epi8(c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr120941-1b.c b/gcc/testsuite/gcc.target/i386/pr120941-1b.c
new file mode 100644
index 00000000000..a00ba5eb8ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120941-1b.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2 -mtune=znver4" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+
+#include "pr120941-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr120941-1c.c b/gcc/testsuite/gcc.target/i386/pr120941-1c.c
new file mode 100644
index 00000000000..2b10ecdee82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120941-1c.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2 -mtune=znver3" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+
+#include "pr120941-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr120941-1d.c b/gcc/testsuite/gcc.target/i386/pr120941-1d.c
new file mode 100644
index 00000000000..39a5714d99d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120941-1d.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2 -mtune=znver2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+
+#include "pr120941-1a.c"
-- 
2.50.0

Reply via email to