On 12/2/20 2:34 AM, Ilya Leoshkevich wrote:
> Bootstrapped and regtesed on s390x-redhat-linux.  There are slight
> improvements in all SPEC benchmarks, no regressions that could not be
> "fixed" by adding nops.  Ok for master?
> 
> 
> 
> Currently GCC loads large immediates into GPRs from the literal pool,
> which is not as efficient as loading two halves with llihf and oilf.
> 
> gcc/ChangeLog:
> 
> 2020-11-30  Ilya Leoshkevich  <i...@linux.ibm.com>
> 
>       * config/s390/s390-protos.h (s390_const_int_pool_entry_p): New
>       function.
>       * config/s390/s390.c (s390_const_int_pool_entry_p): New
>       function.
>       * config/s390/s390.md: Add define_peephole2 that produces llihf
>       and oilf.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-11-30  Ilya Leoshkevich  <i...@linux.ibm.com>
> 
>       * gcc.target/s390/load-imm64-1.c: New test.
>       * gcc.target/s390/load-imm64-2.c: New test.
> ---
>  gcc/config/s390/s390-protos.h                |  1 +
>  gcc/config/s390/s390.c                       | 31 ++++++++++++++++++++
>  gcc/config/s390/s390.md                      | 22 ++++++++++++++
>  gcc/testsuite/gcc.target/s390/load-imm64-1.c | 10 +++++++
>  gcc/testsuite/gcc.target/s390/load-imm64-2.c | 10 +++++++
>  5 files changed, 74 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-2.c
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index ad2f7f77c18..eb10c3f4bbb 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -135,6 +135,7 @@ extern void s390_split_access_reg (rtx, rtx *, rtx *);
>  extern void print_operand_address (FILE *, rtx);
>  extern void print_operand (FILE *, rtx, int);
>  extern void s390_output_pool_entry (rtx, machine_mode, unsigned int);
> +extern bool s390_const_int_pool_entry_p (rtx, HOST_WIDE_INT *);
>  extern int s390_label_align (rtx_insn *);
>  extern int s390_agen_dep_p (rtx_insn *, rtx_insn *);
>  extern rtx_insn *s390_load_got (void);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 02f18366aa1..e3d68d3543b 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -9400,6 +9400,37 @@ s390_output_pool_entry (rtx exp, machine_mode mode, 
> unsigned int align)
>      }
>  }
>  
> +/* Return true if MEM refers to an integer constant in the literal pool.  If
> +   VAL is not nullptr, then also fill it with the constant's value.  */
> +
> +bool
> +s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val)
> +{
> +  /* Try to match the following:
> +     - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
> +     - (mem (symbol_ref)).  */
> +
> +  if (!MEM_P (mem))
> +    return false;
> +
> +  rtx addr = XEXP (mem, 0);
> +  rtx sym;
> +  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF)
> +    sym = XVECEXP (addr, 0, 0);
> +  else
> +    sym = addr;
> +
> +  if (GET_CODE (sym) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (sym))
!SYMBOL_REF_P (sym)

> +    return false;
> +
> +  rtx val_rtx = get_pool_constant (sym);
> +  if (!CONST_INT_P (val_rtx))
> +    return false;
> +
> +  if (val != nullptr)
> +    *val = INTVAL (val_rtx);
> +  return true;
> +}
Alternatively you probably could have returned the RTX instead and use 
gen_highpart / gen_lowpart in
the peephole. But no need to change that.

>  
>  /* Return an RTL expression representing the value of the return address
>     for the frame COUNT steps up from the current frame.  FRAME is the
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 910415a5974..79e9a75ba2f 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -2116,6 +2116,28 @@ (define_peephole2
>    [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
>    "")
>  
> +; Split loading of 64-bit constants into GPRs into llihf + oilf -
> +; counterintuitively, using oilf is faster than iilf.  oilf clobbers
> +; cc, so cc must be dead.
> +(define_peephole2
> +  [(set (match_operand:DI 0 "register_operand" "")
> +        (match_operand:DI 1 "memory_operand" ""))]
> +  "TARGET_64BIT
> +   && TARGET_EXTIMM
> +   && GENERAL_REG_P (operands[0])
> +   && s390_const_int_pool_entry_p (operands[1], nullptr)
> +   && peep2_reg_dead_p (1, gen_rtx_REG (CCmode, CC_REGNUM))"
> +  [(set (match_dup 0) (match_dup 2))
> +   (parallel
> +    [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3)))
> +     (clobber (reg:CC CC_REGNUM))])]
> +{
> +  HOST_WIDE_INT val;
> +  gcc_assert (s390_const_int_pool_entry_p (operands[1], &val));

This probably breaks with checking disabled.

> +  operands[2] = gen_rtx_CONST_INT (DImode, val & 0xFFFFFFFF00000000);
> +  operands[3] = gen_rtx_CONST_INT (DImode, val & 0x00000000FFFFFFFF);

ULL for the constants?

> +})
> +
>  ;
>  ; movsi instruction pattern(s).
>  ;
> diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-1.c 
> b/gcc/testsuite/gcc.target/s390/load-imm64-1.c
> new file mode 100644
> index 00000000000..db0a89395aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/load-imm64-1.c
> @@ -0,0 +1,10 @@
> +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is
> +   not available.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=z9-109" } */
> +
> +unsigned long magic (void) { return 0x3f08c5392f756cd; }
> +
> +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */
> diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-2.c 
> b/gcc/testsuite/gcc.target/s390/load-imm64-2.c
> new file mode 100644
> index 00000000000..43c00cdca3a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/load-imm64-2.c
> @@ -0,0 +1,10 @@
> +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is
> +   available.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=z10" } */
> +
> +unsigned long magic (void) { return 0x3f08c5392f756cd; }
> +
> +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */
> 

Reply via email to