On Tue, 20 Jan 2026, liuhongt wrote:

> /* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for
>    128/256/512-bit vector, if disabled, the move will be done by
>    broadcast/load from constant pool
> 
>    broadcast from integer:
>       mov    $0xa,%eax
>       vmovd  %eax,%xmm0
>       vpbroadcastd %xmm0,%xmm0
> 
>    broadcast/load from constant pool:
>       vpbroadcastd CST.0(%rip), %xmm0  */
> 
> The tune is on by default.

Thanks!

I wonder if the broadcast/load from constnat pool could be late
split in to the former when a GPR is available (and the tune
indicates it's profitable).

> gcc/ChangeLog:
> 
>       PR target/123631
>       * config/i386/i386-expand.cc (ix86_vector_duplicate_value):
>       Don't force CONST_INT to reg !TARGET_PREFER_BCST_FROM_INTEGER,
>       force it to mem instead.
>       * config/i386/i386.h (TARGET_PREFER_BCST_FROM_INTEGER): New macro.
>       * config/i386/x86-tune.def
>       (X86_TUNE_PREFER_BCST_FROM_INTEGER): New tune.
> ---
>  gcc/config/i386/i386-expand.cc | 17 +++++++++++++----
>  gcc/config/i386/i386.h         |  3 +++
>  gcc/config/i386/x86-tune.def   | 15 +++++++++++++++
>  3 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index d6525ddcdd0..a82bb4399c9 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -17361,12 +17361,21 @@ ix86_vector_duplicate_value (machine_mode mode, rtx 
> target, rtx val)
>        machine_mode innermode = GET_MODE_INNER (mode);
>        rtx reg;
>  
> -      /* If that fails, force VAL into a register.  */
> +      /* If that fails, force VAL into a register or mem.  */
>  
>        start_sequence ();
> -      reg = force_reg (innermode, val);
> -      if (GET_MODE (reg) != innermode)
> -     reg = gen_lowpart (innermode, reg);
> +
> +      if (!TARGET_PREFER_BCST_FROM_INTEGER && CONST_INT_P (val)
> +       && GET_MODE_BITSIZE (innermode) <= HOST_BITS_PER_WIDE_INT
> +       && GET_MODE_BITSIZE(mode) >= 128)
> +     reg = validize_mem (force_const_mem (innermode, val));
> +      else
> +     {
> +       reg = force_reg (innermode, val);
> +       if (GET_MODE (reg) != innermode)
> +         reg = gen_lowpart (innermode, reg);
> +     }
> +
>        SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
>        seq = end_sequence ();
>        if (seq)
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 71bacc22052..888edfed88f 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -409,6 +409,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>       ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC]
>  #define TARGET_INTER_UNIT_CONVERSIONS \
>       ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
> +#define TARGET_PREFER_BCST_FROM_INTEGER \
> +  ix86_tune_features[X86_TUNE_PREFER_BCST_FROM_INTEGER]
> +
>  #define TARGET_FOUR_JUMP_LIMIT       
> ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
>  #define TARGET_SCHEDULE              ix86_tune_features[X86_TUNE_SCHEDULE]
>  #define TARGET_USE_BT                ix86_tune_features[X86_TUNE_USE_BT]
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index a1944620daf..53cf1a19433 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -488,6 +488,21 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, 
> "inter_unit_moves_from_vec",
>  DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
>            ~(m_AMDFAM10 | m_BDVER))
>  
> +/* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for
> +   128/256/512-bit vector, if disabled, the move will be done by
> +   broadcast/load from constant pool
> +
> +   broadcast from integer:
> +      mov    $0xa,%eax
> +      vmovd  %eax,%xmm0
> +      vpbroadcastd %xmm0,%xmm0
> +
> +   broadcast/load from constant pool:
> +      vpbroadcastd CST.0(%rip), %xmm0  */
> +
> +DEF_TUNE (X86_TUNE_PREFER_BCST_FROM_INTEGER, "prefer_bcst_from_integer",
> +          m_ALL)
> +
>  /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
>     fp converts to destination register.  */
>  DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, 
> "split_mem_opnd_for_fp_converts",
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to