CC maintainers.
Hongyu Wang <[email protected]> 于2026年1月22日周四 11:33写道: > > Hi, > > For _mm256_broadcastsi128_si256 call with -mapxf enabled it may produce > illegal vbroadcasti128 with egpr under high register pressure. > > See https://godbolt.org/z/jbWTPfn1f > > Restrict the pattern to use "jm" and gpr16 for avx2 alternative. > > Bootstrapped & regtested on x86_64-pc-linux-gnu. Ok for trunk and > backport down to gcc14? > > gcc/ChangeLog: > > * config/i386/sse.md (avx2_vbroadcasti128_<mode>): Constraint > alternative 0 with jm and add gpr16 attr to avoid egpr usage. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-broadcast.c: New test. > --- > gcc/config/i386/sse.md | 5 ++-- > gcc/testsuite/gcc.target/i386/apx-broadcast.c | 24 +++++++++++++++++++ > 2 files changed, 27 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-broadcast.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 7bab6eb3b97..4bfbd3c59a9 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -28332,17 +28332,18 @@ (define_insn "*vec_dupv2di" > (define_insn "avx2_vbroadcasti128_<mode>" > [(set (match_operand:VI_256 0 "register_operand" "=x,v,v") > (vec_concat:VI_256 > - (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m") > + (match_operand:<ssehalfvecmode> 1 "memory_operand" "jm,m,m") > (match_dup 1)))] > "TARGET_AVX2" > "@ > vbroadcasti128\t{%1, %0|%0, %1} > vbroadcast<i128vldq>\t{%1, %0|%0, %1} > vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}" > - [(set_attr "isa" "*,avx512dq,avx512vl") > + [(set_attr "isa" "noavx512vl,avx512dq,avx512vl") > (set_attr "type" "ssemov") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "vex,evex,evex") > + (set_attr "addr" "gpr16,*,*") > (set_attr "mode" "OI")]) > > ;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use > diff --git a/gcc/testsuite/gcc.target/i386/apx-broadcast.c > b/gcc/testsuite/gcc.target/i386/apx-broadcast.c > new file mode 100644 > index 00000000000..7678dcd4550 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-broadcast.c > @@ -0,0 +1,24 @@ > +/* { dg-do assemble { target { apxf && { ! ia32 } } } } */ > +/* { dg-options "-mavx512vl -mapxf -O2" } */ > + > +#include <stdint.h> > +#include <immintrin.h> > + > +void broadcast_avx2(int *sx, __m256i *coeff, __m128i *temp) { > + > + __m256i semp[8]; > + > + > + for (int i = 0; i < 8; i++) > + { > + asm volatile ("" : : : "r8", "r9", "r10", "r11", "r12", "r13", > + "r14", "r15", "rax", "rcx", "rsi", "rdi", "rdx"); > + register volatile uint64_t sm asm ("%r16") = i; > + semp[i] = _mm256_broadcastsi128_si256(temp[sm]); > + } > + > + coeff[0] = _mm256_unpacklo_epi64(semp[0], semp[1]); > + coeff[1] = _mm256_unpackhi_epi64(semp[2], semp[3]); > + coeff[2] = _mm256_unpacklo_epi64(semp[4], semp[5]); > + coeff[3] = _mm256_unpackhi_epi64(semp[6], semp[7]); > +} > -- > 2.31.1 >
