CC maintainers.

Hongyu Wang <[email protected]> 于2026年1月22日周四 11:33写道:
>
> Hi,
>
> For _mm256_broadcastsi128_si256 call with -mapxf enabled it may produce
> illegal vbroadcasti128 with egpr under high register pressure.
>
> See https://godbolt.org/z/jbWTPfn1f
>
> Restrict the pattern to use "jm" and gpr16 for avx2 alternative.
>
> Bootstrapped & regtested on x86_64-pc-linux-gnu. Ok for trunk and
> backport down to gcc14?
>
> gcc/ChangeLog:
>
>         * config/i386/sse.md (avx2_vbroadcasti128_<mode>): Constraint
>         alternative 0 with jm and add gpr16 attr to avoid egpr usage.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-broadcast.c: New test.
> ---
>  gcc/config/i386/sse.md                        |  5 ++--
>  gcc/testsuite/gcc.target/i386/apx-broadcast.c | 24 +++++++++++++++++++
>  2 files changed, 27 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-broadcast.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 7bab6eb3b97..4bfbd3c59a9 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -28332,17 +28332,18 @@ (define_insn "*vec_dupv2di"
>  (define_insn "avx2_vbroadcasti128_<mode>"
>    [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
>         (vec_concat:VI_256
> -         (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
> +         (match_operand:<ssehalfvecmode> 1 "memory_operand" "jm,m,m")
>           (match_dup 1)))]
>    "TARGET_AVX2"
>    "@
>     vbroadcasti128\t{%1, %0|%0, %1}
>     vbroadcast<i128vldq>\t{%1, %0|%0, %1}
>     vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
> -  [(set_attr "isa" "*,avx512dq,avx512vl")
> +  [(set_attr "isa" "noavx512vl,avx512dq,avx512vl")
>     (set_attr "type" "ssemov")
>     (set_attr "prefix_extra" "1")
>     (set_attr "prefix" "vex,evex,evex")
> +   (set_attr "addr" "gpr16,*,*")
>     (set_attr "mode" "OI")])
>
>  ;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use
> diff --git a/gcc/testsuite/gcc.target/i386/apx-broadcast.c 
> b/gcc/testsuite/gcc.target/i386/apx-broadcast.c
> new file mode 100644
> index 00000000000..7678dcd4550
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-broadcast.c
> @@ -0,0 +1,24 @@
> +/* { dg-do assemble { target { apxf && { ! ia32 } } } } */
> +/* { dg-options "-mavx512vl -mapxf -O2" } */
> +
> +#include <stdint.h>
> +#include <immintrin.h>
> +
> +void broadcast_avx2(int *sx, __m256i *coeff, __m128i *temp) {
> +
> +  __m256i semp[8];
> +
> +
> +  for (int i = 0; i < 8; i++)
> +    {
> +      asm volatile ("" : : : "r8", "r9", "r10", "r11", "r12", "r13",
> +                   "r14", "r15", "rax", "rcx", "rsi", "rdi", "rdx");
> +      register volatile uint64_t sm asm ("%r16") = i;
> +      semp[i] = _mm256_broadcastsi128_si256(temp[sm]);
> +    }
> +
> +  coeff[0] = _mm256_unpacklo_epi64(semp[0], semp[1]);
> +  coeff[1] = _mm256_unpackhi_epi64(semp[2], semp[3]);
> +  coeff[2] = _mm256_unpacklo_epi64(semp[4], semp[5]);
> +  coeff[3] = _mm256_unpackhi_epi64(semp[6], semp[7]);
> +}
> --
> 2.31.1
>

Reply via email to