Hi,

For _mm256_broadcastsi128_si256 call with -mapxf enabled it may produce
illegal vbroadcasti128 with egpr under high register pressure. 

See https://godbolt.org/z/jbWTPfn1f

Restrict the pattern to use "jm" and gpr16 for avx2 alternative.

Bootstrapped & regtested on x86_64-pc-linux-gnu. Ok for trunk and
backport down to gcc14?

gcc/ChangeLog:

        * config/i386/sse.md (avx2_vbroadcasti128_<mode>): Constraint
        alternative 0 with jm and add gpr16 attr to avoid egpr usage.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-broadcast.c: New test.
---
 gcc/config/i386/sse.md                        |  5 ++--
 gcc/testsuite/gcc.target/i386/apx-broadcast.c | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-broadcast.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7bab6eb3b97..4bfbd3c59a9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -28332,17 +28332,18 @@ (define_insn "*vec_dupv2di"
 (define_insn "avx2_vbroadcasti128_<mode>"
   [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
        (vec_concat:VI_256
-         (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
+         (match_operand:<ssehalfvecmode> 1 "memory_operand" "jm,m,m")
          (match_dup 1)))]
   "TARGET_AVX2"
   "@
    vbroadcasti128\t{%1, %0|%0, %1}
    vbroadcast<i128vldq>\t{%1, %0|%0, %1}
    vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "*,avx512dq,avx512vl")
+  [(set_attr "isa" "noavx512vl,avx512dq,avx512vl")
    (set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex,evex,evex")
+   (set_attr "addr" "gpr16,*,*")
    (set_attr "mode" "OI")])
 
 ;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use
diff --git a/gcc/testsuite/gcc.target/i386/apx-broadcast.c 
b/gcc/testsuite/gcc.target/i386/apx-broadcast.c
new file mode 100644
index 00000000000..7678dcd4550
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-broadcast.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble { target { apxf && { ! ia32 } } } } */
+/* { dg-options "-mavx512vl -mapxf -O2" } */
+
+#include <stdint.h>
+#include <immintrin.h>
+
+void broadcast_avx2(int *sx, __m256i *coeff, __m128i *temp) {
+  
+  __m256i semp[8];
+
+
+  for (int i = 0; i < 8; i++)
+    {
+      asm volatile ("" : : : "r8", "r9", "r10", "r11", "r12", "r13",
+                   "r14", "r15", "rax", "rcx", "rsi", "rdi", "rdx");
+      register volatile uint64_t sm asm ("%r16") = i;
+      semp[i] = _mm256_broadcastsi128_si256(temp[sm]);
+    }
+
+  coeff[0] = _mm256_unpacklo_epi64(semp[0], semp[1]);
+  coeff[1] = _mm256_unpackhi_epi64(semp[2], semp[3]);
+  coeff[2] = _mm256_unpacklo_epi64(semp[4], semp[5]);
+  coeff[3] = _mm256_unpackhi_epi64(semp[6], semp[7]);
+}
-- 
2.31.1

Reply via email to