On 12/2/20 2:34 AM, Ilya Leoshkevich wrote: > Bootstrapped and regtesed on s390x-redhat-linux. There are slight > improvements in all SPEC benchmarks, no regressions that could not be > "fixed" by adding nops. Ok for master? > > > > Currently GCC loads large immediates into GPRs from the literal pool, > which is not as efficient as loading two halves with llihf and oilf. > > gcc/ChangeLog: > > 2020-11-30 Ilya Leoshkevich <i...@linux.ibm.com> > > * config/s390/s390-protos.h (s390_const_int_pool_entry_p): New > function. > * config/s390/s390.c (s390_const_int_pool_entry_p): New > function. > * config/s390/s390.md: Add define_peephole2 that produces llihf > and oilf. > > gcc/testsuite/ChangeLog: > > 2020-11-30 Ilya Leoshkevich <i...@linux.ibm.com> > > * gcc.target/s390/load-imm64-1.c: New test. > * gcc.target/s390/load-imm64-2.c: New test. > --- > gcc/config/s390/s390-protos.h | 1 + > gcc/config/s390/s390.c | 31 ++++++++++++++++++++ > gcc/config/s390/s390.md | 22 ++++++++++++++ > gcc/testsuite/gcc.target/s390/load-imm64-1.c | 10 +++++++ > gcc/testsuite/gcc.target/s390/load-imm64-2.c | 10 +++++++ > 5 files changed, 74 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-1.c > create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-2.c > > diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h > index ad2f7f77c18..eb10c3f4bbb 100644 > --- a/gcc/config/s390/s390-protos.h > +++ b/gcc/config/s390/s390-protos.h > @@ -135,6 +135,7 @@ extern void s390_split_access_reg (rtx, rtx *, rtx *); > extern void print_operand_address (FILE *, rtx); > extern void print_operand (FILE *, rtx, int); > extern void s390_output_pool_entry (rtx, machine_mode, unsigned int); > +extern bool s390_const_int_pool_entry_p (rtx, HOST_WIDE_INT *); > extern int s390_label_align (rtx_insn *); > extern int s390_agen_dep_p (rtx_insn *, rtx_insn *); > extern rtx_insn *s390_load_got (void); > diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c > index 02f18366aa1..e3d68d3543b 100644 > --- a/gcc/config/s390/s390.c > +++ b/gcc/config/s390/s390.c > @@ -9400,6 +9400,37 @@ s390_output_pool_entry (rtx exp, machine_mode mode, > unsigned int align) > } > } > > +/* Return true if MEM refers to an integer constant in the literal pool. If > + VAL is not nullptr, then also fill it with the constant's value. */ > + > +bool > +s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val) > +{ > + /* Try to match the following: > + - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)). > + - (mem (symbol_ref)). */ > + > + if (!MEM_P (mem)) > + return false; > + > + rtx addr = XEXP (mem, 0); > + rtx sym; > + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF) > + sym = XVECEXP (addr, 0, 0); > + else > + sym = addr; > + > + if (GET_CODE (sym) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (sym)) !SYMBOL_REF_P (sym)
> + return false; > + > + rtx val_rtx = get_pool_constant (sym); > + if (!CONST_INT_P (val_rtx)) > + return false; > + > + if (val != nullptr) > + *val = INTVAL (val_rtx); > + return true; > +} Alternatively you probably could have returned the RTX instead and use gen_highpart / gen_lowpart in the peephole. But no need to change that. > > /* Return an RTL expression representing the value of the return address > for the frame COUNT steps up from the current frame. FRAME is the > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 910415a5974..79e9a75ba2f 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -2116,6 +2116,28 @@ (define_peephole2 > [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] > "") > > +; Split loading of 64-bit constants into GPRs into llihf + oilf - > +; counterintuitively, using oilf is faster than iilf. oilf clobbers > +; cc, so cc must be dead. > +(define_peephole2 > + [(set (match_operand:DI 0 "register_operand" "") > + (match_operand:DI 1 "memory_operand" ""))] > + "TARGET_64BIT > + && TARGET_EXTIMM > + && GENERAL_REG_P (operands[0]) > + && s390_const_int_pool_entry_p (operands[1], nullptr) > + && peep2_reg_dead_p (1, gen_rtx_REG (CCmode, CC_REGNUM))" > + [(set (match_dup 0) (match_dup 2)) > + (parallel > + [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3))) > + (clobber (reg:CC CC_REGNUM))])] > +{ > + HOST_WIDE_INT val; > + gcc_assert (s390_const_int_pool_entry_p (operands[1], &val)); This probably breaks with checking disabled. > + operands[2] = gen_rtx_CONST_INT (DImode, val & 0xFFFFFFFF00000000); > + operands[3] = gen_rtx_CONST_INT (DImode, val & 0x00000000FFFFFFFF); ULL for the constants? > +}) > + > ; > ; movsi instruction pattern(s). > ; > diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-1.c > b/gcc/testsuite/gcc.target/s390/load-imm64-1.c > new file mode 100644 > index 00000000000..db0a89395aa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/load-imm64-1.c > @@ -0,0 +1,10 @@ > +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is > + not available. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=z9-109" } */ > + > +unsigned long magic (void) { return 0x3f08c5392f756cd; } > + > +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */ > +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */ > diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-2.c > b/gcc/testsuite/gcc.target/s390/load-imm64-2.c > new file mode 100644 > index 00000000000..43c00cdca3a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/load-imm64-2.c > @@ -0,0 +1,10 @@ > +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is > + available. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=z10" } */ > + > +unsigned long magic (void) { return 0x3f08c5392f756cd; } > + > +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */ > +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */ >