On Thu, 2020-03-26 at 05:06 -0500, luoxhu--- via Gcc-patches wrote:
> From: Xionghu Luo <luo...@linux.ibm.com>
> 
> Remove split code from add<mode>3 to allow a later pass to split.
> This allows later logic to hoist out constant load in add
> instructions.
> In loop, lis+ori could be hoisted out to improve performance compared
> with
> previous addis+addi (About 15% on typical case), weak point is
> one more register is used and one more instruction is
> generated.  i.e.:
> 
> addis 3,3,0x8765
> addi 3,3,0x4321
> 
> =>
> 
> lis 9,0x8765
> ori 9,9,0x4321
> add 3,3,9

LGTM.  :-)
I defer to Segher for his review & approval, etc.

Thanks,
-Will

> 
> gcc/ChangeLog:
> 
> 2020-03-26  Xiong Hu Luo  <luo...@linux.ibm.com>
> 
>       * config/rs6000/rs6000.md (add<mode>3): Remove split code, move constant
>         to temp register before add.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-03-26  Xiong Hu Luo  <luo...@linux.ibm.com>
> 
>       * gcc.target/powerpc/add-const.c: New.
> ---
>  gcc/config/rs6000/rs6000.md                  | 25 ++--------------
> ----
>  gcc/testsuite/gcc.target/powerpc/add-const.c | 18 ++++++++++++++
>  2 files changed, 20 insertions(+), 23 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/add-const.c
> 
> diff --git a/gcc/config/rs6000/rs6000.md
> b/gcc/config/rs6000/rs6000.md
> index ad88b6783af..72f3f604e0d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -1733,29 +1733,8 @@ (define_expand "add<mode>3"
>                 || rtx_equal_p (operands[0], operands[1]))
>                ? operands[0] : gen_reg_rtx (<MODE>mode));
> 
> -      /* Adding a constant to r0 is not a valid insn, so use a
> different
> -      strategy in that case.  */
> -      if (reg_or_subregno (operands[1]) == 0 || reg_or_subregno
> (tmp) == 0)
> -     {
> -       if (operands[0] == operands[1])
> -         FAIL;
> -       rs6000_emit_move (operands[0], operands[2], <MODE>mode);
> -       emit_insn (gen_add<mode>3 (operands[0], operands[1],
> operands[0]));
> -       DONE;
> -     }
> -
> -      HOST_WIDE_INT val = INTVAL (operands[2]);
> -      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
> -      HOST_WIDE_INT rest = trunc_int_for_mode (val - low,
> <MODE>mode);
> -
> -      if (<MODE>mode == DImode && !satisfies_constraint_L (GEN_INT
> (rest)))
> -     FAIL;
> -
> -      /* The ordering here is important for the prolog expander.
> -      When space is allocated from the stack, adding 'low' first may
> -      produce a temporary deallocation (which would be bad).  */
> -      emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest)));
> -      emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low)));
> +      rs6000_emit_move (tmp, operands[2], <MODE>mode);
> +      emit_insn (gen_add<mode>3 (operands[0], operands[1], tmp));
>        DONE;
>      }
>  })
> diff --git a/gcc/testsuite/gcc.target/powerpc/add-const.c
> b/gcc/testsuite/gcc.target/powerpc/add-const.c
> new file mode 100644
> index 00000000000..e1007247b32
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/add-const.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O3 -fno-unroll-loops" } */
> +
> +/* Ensure the lis,ori are generated, which indicates they have
> +   been hoisted outside of the loop.  */
> +
> +typedef unsigned long ulong;
> +ulong
> +foo (ulong n, ulong h)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    h = ((h + 8) | h) + 0x87654321;
> +  return h;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mori\M} 1 } } */

Reply via email to