On Fri, Jan 23, 2026 at 10:42 AM Hongyu Wang <[email protected]> wrote:
>
> CC maintainers.
>
> Hongyu Wang <[email protected]> 于2026年1月22日周四 11:33写道:
> >
> > Hi,
> >
> > For _mm256_broadcastsi128_si256 call with -mapxf enabled it may produce
> > illegal vbroadcasti128 with egpr under high register pressure.
> >
> > See https://godbolt.org/z/jbWTPfn1f
> >
> > Restrict the pattern to use "jm" and gpr16 for avx2 alternative.
> >
> > Bootstrapped & regtested on x86_64-pc-linux-gnu. Ok for trunk and
> > backport down to gcc14?
Ok.
> >
> > gcc/ChangeLog:
> >
> > * config/i386/sse.md (avx2_vbroadcasti128_<mode>): Constraint
> > alternative 0 with jm and add gpr16 attr to avoid egpr usage.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/apx-broadcast.c: New test.
> > ---
> > gcc/config/i386/sse.md | 5 ++--
> > gcc/testsuite/gcc.target/i386/apx-broadcast.c | 24 +++++++++++++++++++
> > 2 files changed, 27 insertions(+), 2 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/apx-broadcast.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 7bab6eb3b97..4bfbd3c59a9 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -28332,17 +28332,18 @@ (define_insn "*vec_dupv2di"
> > (define_insn "avx2_vbroadcasti128_<mode>"
> > [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
> > (vec_concat:VI_256
> > - (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
> > + (match_operand:<ssehalfvecmode> 1 "memory_operand" "jm,m,m")
> > (match_dup 1)))]
> > "TARGET_AVX2"
> > "@
> > vbroadcasti128\t{%1, %0|%0, %1}
> > vbroadcast<i128vldq>\t{%1, %0|%0, %1}
> > vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
> > - [(set_attr "isa" "*,avx512dq,avx512vl")
> > + [(set_attr "isa" "noavx512vl,avx512dq,avx512vl")
> > (set_attr "type" "ssemov")
> > (set_attr "prefix_extra" "1")
> > (set_attr "prefix" "vex,evex,evex")
> > + (set_attr "addr" "gpr16,*,*")
> > (set_attr "mode" "OI")])
> >
> > ;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-broadcast.c
> > b/gcc/testsuite/gcc.target/i386/apx-broadcast.c
> > new file mode 100644
> > index 00000000000..7678dcd4550
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-broadcast.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do assemble { target { apxf && { ! ia32 } } } } */
> > +/* { dg-options "-mavx512vl -mapxf -O2" } */
> > +
> > +#include <stdint.h>
> > +#include <immintrin.h>
> > +
> > +void broadcast_avx2(int *sx, __m256i *coeff, __m128i *temp) {
> > +
> > + __m256i semp[8];
> > +
> > +
> > + for (int i = 0; i < 8; i++)
> > + {
> > + asm volatile ("" : : : "r8", "r9", "r10", "r11", "r12", "r13",
> > + "r14", "r15", "rax", "rcx", "rsi", "rdi", "rdx");
> > + register volatile uint64_t sm asm ("%r16") = i;
> > + semp[i] = _mm256_broadcastsi128_si256(temp[sm]);
> > + }
> > +
> > + coeff[0] = _mm256_unpacklo_epi64(semp[0], semp[1]);
> > + coeff[1] = _mm256_unpackhi_epi64(semp[2], semp[3]);
> > + coeff[2] = _mm256_unpacklo_epi64(semp[4], semp[5]);
> > + coeff[3] = _mm256_unpackhi_epi64(semp[6], semp[7]);
> > +}
> > --
> > 2.31.1
> >
--
BR,
Hongtao