Hi Jeff,

on 2024/6/13 10:19, Jiufu Guo wrote:
> Hi,
> 
> Sometimes, a complicated constant is built via 3(or more)
> instructions.  Generally speaking, it would not be as fast
> as loading it from the constant pool (as the discussions in
> PR63281):
> "ld" is one instruction.  If consider "address/toc" adjust,
> we may count it as 2 instructions. And "pld" may need fewer
> cycles.
> 
> As testing(SPEC2017), it could get better/stable runtime
> if set the threshold as "> 2" (compare with "> 3").
> 
> As known, because the constant is load from memory by this
> patch,  so this functionality may affect the cache missing.

I wonder if it's a good idea to offer one rs6000 specific
parameter to control this threshold, since this change isn't
always a win, like 5 constant building simple insns can be
well scheduled among insns in its own bb and an equilavent
load can suffer from cache miss or insufficient LSU resource
A parameter may be better for users in case they want to
fine tune further for some cases.

> While, IMHO, this patch would be still do the right thing.
> 
> Compare with the previous version:
> This version 1. allow assigning complicate constant to r0 before RA,
> 2. allow more condition beside TARGET_ELF,
> 3. updated test cases, and remove 2 test cases as the orignal test
> point is not used any more.

Can they be written with the proposed FORCE_CONST_INTO_REG used in
other test cases?

> 
> Boostrap & regtest pass on ppc64{,le}.
> Is this ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
>       PR target/63281
> 
> gcc/ChangeLog:
> 
>       * config/rs6000/rs6000.cc (rs6000_emit_set_const): Split constant to
>       memory under -m64.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/powerpc/const_anchors.c: Test final-rtl.
>       * gcc.target/powerpc/pr106550_1.c (FORCE_CONST_INTO_REG): New macro.
>       * gcc.target/powerpc/pr106550_1.c: Use macro FORCE_CONST_INTO_REG.

Curious if git gcc-verify will complain this (same changed file have multipe 
lines).

>       * gcc.target/powerpc/pr87870.c: Update asm insn checking.
>       * gcc.target/powerpc/pr93012.c: Likewise.
>       * gcc.target/powerpc/parall_5insn_const.c: Removed.
>       * gcc.target/powerpc/pr106550.c: Removed.

Nit: s/Removed/Remove/

>       * gcc.target/powerpc/pr63281.c: New test.
> ---
>  gcc/config/rs6000/rs6000.cc                   | 15 +++++++++++
>  .../gcc.target/powerpc/const_anchors.c        |  5 ++--
>  .../gcc.target/powerpc/parall_5insn_const.c   | 27 -------------------
>  gcc/testsuite/gcc.target/powerpc/pr106550.c   | 14 ----------
>  gcc/testsuite/gcc.target/powerpc/pr106550_1.c | 16 ++++++-----
>  gcc/testsuite/gcc.target/powerpc/pr63281.c    | 11 ++++++++
>  gcc/testsuite/gcc.target/powerpc/pr87870.c    |  5 +++-
>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  6 ++++-
>  8 files changed, 47 insertions(+), 52 deletions(-)
>  delete mode 100644 gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
>  delete mode 100644 gcc/testsuite/gcc.target/powerpc/pr106550.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index e4dc629ddcc..bc9d6f5c34f 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10240,6 +10240,21 @@ rs6000_emit_set_const (rtx dest, rtx source)
>         c = sext_hwi (c, 32);
>         emit_move_insn (lo, GEN_INT (c));
>       }
> +

Nit: Unexpected new line.

> +      else if ((can_create_pseudo_p () || base_reg_operand (dest, mode))

Nit: It's not obvious, maybe one comment on why we need base_reg_operand
restriction under !can_create_pseudo_p.

BR,
Kewen

> +            && TARGET_64BIT && num_insns_constant (source, mode) > 2)
> +     {
> +       rtx sym = force_const_mem (mode, source);
> +       if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
> +           && use_toc_relative_ref (XEXP (sym, 0), mode))
> +         {
> +           rtx toc = create_TOC_reference (XEXP (sym, 0), dest);
> +           sym = gen_const_mem (mode, toc);
> +           set_mem_alias_set (sym, get_TOC_alias_set ());
> +         }
> +
> +       emit_move_insn (dest, sym);
> +     }
>        else
>       rs6000_emit_set_long_const (dest, c);
>        break;
> diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c 
> b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
> index 542e2674b12..682e773d506 100644
> --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
> +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target has_arch_ppc64 } } */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -fdump-rtl-final" } */
>  
>  #define C1 0x2351847027482577ULL
>  #define C2 0x2351847027482578ULL
> @@ -17,4 +17,5 @@ void __attribute__ ((noinline)) foo1 (long long *a, long 
> long b)
>      *a++ = C2;
>  }
>  
> -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
> +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c 
> b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> deleted file mode 100644
> index e3a9a7264cf..00000000000
> --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> +++ /dev/null
> @@ -1,27 +0,0 @@
> -/* { dg-do run } */
> -/* { dg-options "-O2 -mno-prefixed -save-temps" } */
> -/* { dg-require-effective-target has_arch_ppc64 } */
> -
> -/* { dg-final { scan-assembler-times {\mlis\M} 4 } } */
> -/* { dg-final { scan-assembler-times {\mori\M} 4 } } */
> -/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
> -
> -void __attribute__ ((noinline)) foo (unsigned long long *a)
> -{
> -  /* 2 lis + 2 ori + 1 rldimi for each constant.  */
> -  *a++ = 0x800aabcdc167fa16ULL;
> -  *a++ = 0x7543a876867f616ULL;
> -}
> -
> -long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL};
> -int
> -main ()
> -{
> -  long long res[2];
> -
> -  foo (res);
> -  if (__builtin_memcmp (res, A, sizeof (res)) != 0)
> -    __builtin_abort ();
> -
> -  return 0;
> -}
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c 
> b/gcc/testsuite/gcc.target/powerpc/pr106550.c
> deleted file mode 100644
> index 92b76ac8811..00000000000
> --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c
> +++ /dev/null
> @@ -1,14 +0,0 @@
> -/* PR target/106550 */
> -/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> -/* { dg-require-effective-target has_arch_ppc64 } */
> -
> -void
> -foo (unsigned long long *a)
> -{
> -  *a++ = 0x020805006106003; /* pli+pli+rldimi */
> -  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
> -}
> -
> -/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
> -/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
> -
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> index 5ab40d71a56..aa98f31865e 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> @@ -4,17 +4,19 @@
>  /* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */
>  /* force the constant splitter run after RA: -fdisable-rtl-split1.  */
>  
> +#define FORCE_CONST_INTO_REG(DEST, CST)                                      
>   \
> +  {                                                                          
>   \
> +    register long long d asm ("r0") = CST;                                   
>   \
> +    asm volatile ("std %1, %0" : : "m"(DEST), "r"(d));                       
>   \
> +  }
> +
>  void
>  foo (unsigned long long *a)
>  {
>    /* Test oris/ori is used where paddi does not work with 'r0'. */
> -  register long long d asm("r0") = 0x1245abcef9240dec; /* pli+sldi+oris+ori 
> */
> -  long long n;
> -  asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
> -  *a++ = n;
> -
> -  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */
> -  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */
> +  FORCE_CONST_INTO_REG (*a++, 0x1245abcef9240dec); /* pli+sldi+oris+ori */
> +  FORCE_CONST_INTO_REG (*a++, 0x235a8470a7480000ULL); /* pli+sldi+oris */
> +  FORCE_CONST_INTO_REG (*a++, 0x23a184700000b677ULL); /* pli+sldi+ori */
>  }
>  
>  /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c 
> b/gcc/testsuite/gcc.target/powerpc/pr63281.c
> new file mode 100644
> index 00000000000..9763a7181fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c
> @@ -0,0 +1,11 @@
> +/* Check loading constant from memory pool.  */
> +/* { dg-options "-O2 -mpowerpc64" } */
> +
> +void
> +foo (unsigned long long *a)
> +{
> +  *a++ = 0x2351847027482577ULL;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mp?ld\M} 1 { target { lp64 } } } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c 
> b/gcc/testsuite/gcc.target/powerpc/pr87870.c
> index d2108ac3386..09b2e8de901 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c
> @@ -25,4 +25,7 @@ test3 (void)
>    return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
>  }
>  
> -/* { dg-final { scan-assembler-not {\mld\M} } } */
> +/* test3 is using "ld" to load the value to r3 and r4. So there are 2 'ld's
> +   test0, test1 and test2 are using "li", then check 6 'li's.  */
> +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c 
> b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> index 4f764d0576f..660fb0dddfa 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> @@ -10,4 +10,8 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>  
> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
> +/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 
> } } } */
> +
> +/* 4 complicated constants can be loaded from pool.  */
> +/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! 
> has_arch_pwr10 } } } } */
> +/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 
> } } } } */

Reply via email to