On Thu, Jan 22, 2026 at 6:37 PM Richard Biener <[email protected]> wrote: > > On Tue, 20 Jan 2026, liuhongt wrote: > > > /* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for > > 128/256/512-bit vector, if disabled, the move will be done by > > broadcast/load from constant pool > > > > broadcast from integer: > > mov $0xa,%eax > > vmovd %eax,%xmm0 > > vpbroadcastd %xmm0,%xmm0 > > > > broadcast/load from constant pool: > > vpbroadcastd CST.0(%rip), %xmm0 */ > > > > The tune is on by default. > > Thanks! > > I wonder if the broadcast/load from constnat pool could be late > split in to the former when a GPR is available (and the tune > indicates it's profitable). Good point, maybe the implementation of this tune could also be changed to convert the load of const_vector into broadcast from memory before RA, and then decide after RA whether we need to use a GPR to avoid this load. I'm not sure if doing this optimization after RA might have potential issues, I suspect there could be some level of conflict between this and RA. > > > gcc/ChangeLog: > > > > PR target/123631 > > * config/i386/i386-expand.cc (ix86_vector_duplicate_value): > > Don't force CONST_INT to reg !TARGET_PREFER_BCST_FROM_INTEGER, > > force it to mem instead. > > * config/i386/i386.h (TARGET_PREFER_BCST_FROM_INTEGER): New macro. > > * config/i386/x86-tune.def > > (X86_TUNE_PREFER_BCST_FROM_INTEGER): New tune. > > --- > > gcc/config/i386/i386-expand.cc | 17 +++++++++++++---- > > gcc/config/i386/i386.h | 3 +++ > > gcc/config/i386/x86-tune.def | 15 +++++++++++++++ > > 3 files changed, 31 insertions(+), 4 deletions(-) > > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > > index d6525ddcdd0..a82bb4399c9 100644 > > --- a/gcc/config/i386/i386-expand.cc > > +++ b/gcc/config/i386/i386-expand.cc > > @@ -17361,12 +17361,21 @@ ix86_vector_duplicate_value (machine_mode mode, > > rtx target, rtx val) > > machine_mode innermode = GET_MODE_INNER (mode); > > rtx reg; > > > > - /* If that fails, force VAL into a register. */ > > + /* If that fails, force VAL into a register or mem. */ > > > > start_sequence (); > > - reg = force_reg (innermode, val); > > - if (GET_MODE (reg) != innermode) > > - reg = gen_lowpart (innermode, reg); > > + > > + if (!TARGET_PREFER_BCST_FROM_INTEGER && CONST_INT_P (val) > > + && GET_MODE_BITSIZE (innermode) <= HOST_BITS_PER_WIDE_INT > > + && GET_MODE_BITSIZE(mode) >= 128) > > + reg = validize_mem (force_const_mem (innermode, val)); > > + else > > + { > > + reg = force_reg (innermode, val); > > + if (GET_MODE (reg) != innermode) > > + reg = gen_lowpart (innermode, reg); > > + } > > + > > SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); > > seq = end_sequence (); > > if (seq) > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > index 71bacc22052..888edfed88f 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -409,6 +409,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; > > ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC] > > #define TARGET_INTER_UNIT_CONVERSIONS \ > > ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] > > +#define TARGET_PREFER_BCST_FROM_INTEGER \ > > + ix86_tune_features[X86_TUNE_PREFER_BCST_FROM_INTEGER] > > + > > #define TARGET_FOUR_JUMP_LIMIT > > ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] > > #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] > > #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] > > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > > index a1944620daf..53cf1a19433 100644 > > --- a/gcc/config/i386/x86-tune.def > > +++ b/gcc/config/i386/x86-tune.def > > @@ -488,6 +488,21 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, > > "inter_unit_moves_from_vec", > > DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", > > ~(m_AMDFAM10 | m_BDVER)) > > > > +/* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for > > + 128/256/512-bit vector, if disabled, the move will be done by > > + broadcast/load from constant pool > > + > > + broadcast from integer: > > + mov $0xa,%eax > > + vmovd %eax,%xmm0 > > + vpbroadcastd %xmm0,%xmm0 > > + > > + broadcast/load from constant pool: > > + vpbroadcastd CST.0(%rip), %xmm0 */ > > + > > +DEF_TUNE (X86_TUNE_PREFER_BCST_FROM_INTEGER, "prefer_bcst_from_integer", > > + m_ALL) > > + > > /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for > > fp converts to destination register. */ > > DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, > > "split_mem_opnd_for_fp_converts", > > > > -- > Richard Biener <[email protected]> > SUSE Software Solutions Germany GmbH, > Frankenstrasse 146, 90461 Nuernberg, Germany; > GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
-- BR, Hongtao
