On Tue, 22 Feb 2022, Jiufu Guo wrote:
> Hi,
>
> For constants, there are some codes to check: if it is able to put
> to instruction as an immediate operand or it is profitable to load from
> mem. There are still some places that could be improved for platforms.
>
> This patch could handle PR63281/57836. This patch does not change
> too much on the code like force_const_mem and legitimate_constant_p.
> We may integrate these APIs for passes like expand/cse/combine
> as a whole solution in the future (maybe better for stage1?).
>
> Bootstrap and regtest pass on ppc64le and x86_64. Is this ok for trunk?
> Thanks for comments!
I'm not sure whether we need a new hook here, but iff, then I think
whether loading a constant (from memory?) is faster or not depends
on the context. So what's the exact situation and the two variants
you are costing against each other? I assume (since you are
touching CSE) you are costing
(set (...) (mem)) (before CSE)
against
(set (...) (immediate)) (what CSE does now)
vs.
(set (...) (mem)) (original, no CSE)
? With the new hook you are skipping _all_ of the following loops
logic which does look like a quite bad design and hack (not that
I am very familiar with the candidate / costing logic in there).
We already have TARGET_INSN_COST which you could ask for a cost.
Like if we'd have a single_set then just temporarily substitute
the RHS with the candidate and cost the insns and compare against
the original insn cost. So why exactly do you need a new hook
for this particular situation?
Thanks,
Richard.
>
> BR,
> Jiufu
>
> gcc/ChangeLog:
>
> PR target/94393
> PR rtl-optimization/63281
> * config/rs6000/rs6000.cc (TARGET_FASTER_LOADING_CONSTANT):
> New hook.
> (rs6000_faster_loading_const): New hook.
> (rs6000_cannot_force_const_mem): Update.
> (rs6000_emit_move): Use rs6000_faster_loading_const.
> * cse.cc (cse_insn): Use new hook.
> * doc/tm.texi.in: New hook TARGET_FASTER_LOADING_CONSTANT.
> * doc/tm.texi: Regenerate.
> * target.def: New hook faster_loading_constant.
> * targhooks.cc (default_faster_loading_constant): New.
> * targhooks.h (default_faster_loading_constant): Declare it.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/medium_offset.c: Update.
> * gcc.target/powerpc/pr93012.c: Update.
> * gcc.target/powerpc/pr63281.c: New test.
>
> ---
> gcc/config/rs6000/rs6000.cc | 38 ++++++++++++++-----
> gcc/cse.cc | 5 +++
> gcc/doc/tm.texi | 5 +++
> gcc/doc/tm.texi.in | 2 +
> gcc/target.def | 8 ++++
> gcc/targhooks.cc | 6 +++
> .../gcc.target/powerpc/medium_offset.c | 2 +-
> gcc/testsuite/gcc.target/powerpc/pr63281.c | 14 +++++++
> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +-
> 9 files changed, 71 insertions(+), 11 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index d7a7cfe860f..beb21a0bb2b 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -1361,6 +1361,9 @@ static const struct attribute_spec
> rs6000_attribute_table[] =
> #undef TARGET_CANNOT_FORCE_CONST_MEM
> #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
>
> +#undef TARGET_FASTER_LOADING_CONSTANT
> +#define TARGET_FASTER_LOADING_CONSTANT rs6000_faster_loading_const
> +
> #undef TARGET_DELEGITIMIZE_ADDRESS
> #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
>
> @@ -9684,8 +9687,7 @@ rs6000_init_stack_protect_guard (void)
> static bool
> rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
> {
> - if (GET_CODE (x) == HIGH
> - && GET_CODE (XEXP (x, 0)) == UNSPEC)
> + if (GET_CODE (x) == HIGH)
> return true;
>
> /* A TLS symbol in the TOC cannot contain a sum. */
> @@ -10483,6 +10485,30 @@ rs6000_emit_move_si_sf_subreg (rtx dest, rtx source,
> machine_mode mode)
> return false;
> }
>
> +
> +/* Implement TARGET_FASTER_LOADING_CONSTANT. */
> +
> +static bool
> +rs6000_faster_loading_const (machine_mode mode, rtx dest, rtx src)
> +{
> + gcc_assert (CONSTANT_P (src));
> +
> + if (GET_MODE_CLASS (mode) == MODE_CC || mode == VOIDmode)
> + return false;
> + if (GET_CODE (src) == HIGH)
> + return false;
> + if (toc_relative_expr_p (src, false, NULL, NULL))
> + return false;
> + if (rs6000_cannot_force_const_mem (mode, src))
> + return false;
> +
> + if (REG_P (dest) && FP_REGNO_P (REGNO (dest)))
> + return true;
> + if (!CONST_INT_P (src))
> + return true;
> + return num_insns_constant (src, mode) > (TARGET_PCREL ? 1 : 2);
> +}
> +
> /* Emit a move from SOURCE to DEST in mode MODE. */
> void
> rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
> @@ -10860,13 +10886,7 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode
> mode)
> operands[1] = create_TOC_reference (operands[1], operands[0]);
> else if (mode == Pmode
> && CONSTANT_P (operands[1])
> - && GET_CODE (operands[1]) != HIGH
> - && ((REG_P (operands[0])
> - && FP_REGNO_P (REGNO (operands[0])))
> - || !CONST_INT_P (operands[1])
> - || (num_insns_constant (operands[1], mode)
> - > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
> - && !toc_relative_expr_p (operands[1], false, NULL, NULL)
> + && rs6000_faster_loading_const (mode, operands[0], operands[1])
> && (TARGET_CMODEL == CMODEL_SMALL
> || can_create_pseudo_p ()
> || (REG_P (operands[0])
> diff --git a/gcc/cse.cc b/gcc/cse.cc
> index a18b599d324..c33d3c7e911 100644
> --- a/gcc/cse.cc
> +++ b/gcc/cse.cc
> @@ -5158,6 +5158,11 @@ cse_insn (rtx_insn *insn)
> if (dest == pc_rtx && src_const && GET_CODE (src_const) == LABEL_REF)
> src_folded = src_const, src_folded_cost = src_folded_regcost = -1;
>
> + /* Check if src_foled is contant which is fastster to load pool. */
> + if (src_folded && CONSTANT_P (src_folded) && src && MEM_P (src)
> + && targetm.faster_loading_constant (mode, dest, src_folded))
> + src_folded = NULL_RTX, src_folded_cost = src_folded_regcost = -1;
> +
> /* Terminate loop when replacement made. This must terminate since
> the current contents will be tested and will always be valid. */
> while (1)
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 962bbb8caaf..65685311249 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -6014,6 +6014,11 @@ holding the constant. This restriction is often true
> of addresses
> of TLS symbols for various targets.
> @end deftypefn
>
> +@deftypefn {Target Hook} bool TARGET_FASTER_LOADING_CONSTANT (machine_mode
> @var{mode}, rtx @var{y}, rtx @var{x})
> +It returns true if loading contant @var{x} is faster than building
> +from scratch, and set to @var{y}. @var{mode} is the mode of @var{x}.
> +@end deftypefn
> +
> @deftypefn {Target Hook} bool TARGET_USE_BLOCKS_FOR_CONSTANT_P (machine_mode
> @var{mode}, const_rtx @var{x})
> This hook should return true if pool entries for constant @var{x} can
> be placed in an @code{object_block} structure. @var{mode} is the mode
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 394b59e70a6..918914f0e30 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -4148,6 +4148,8 @@ address; but often a machine-dependent strategy can
> generate better code.
>
> @hook TARGET_CANNOT_FORCE_CONST_MEM
>
> +@hook TARGET_FASTER_LOADING_CONSTANT
> +
> @hook TARGET_USE_BLOCKS_FOR_CONSTANT_P
>
> @hook TARGET_USE_BLOCKS_FOR_DECL_P
> diff --git a/gcc/target.def b/gcc/target.def
> index 57e64b20eef..8b007aca9dc 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -2987,6 +2987,14 @@ locally. The default is to return false.",
> bool, (void),
> hook_bool_void_false)
>
> +/* Check if it is profitable to load the constant from constant pool. */
> +DEFHOOK
> +(faster_loading_constant,
> + "It returns true if loading contant @var{x} is faster than building\n\
> +from scratch, and set to @var{y}. @var{mode} is the mode of @var{x}.",
> + bool, (machine_mode mode, rtx y, rtx x),
> + default_faster_loading_constant)
> +
> /* True if it is OK to do sibling call optimization for the specified
> call expression EXP. DECL will be the called function, or NULL if
> this is an indirect call. */
> diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> index fc49235eb38..1d9340d1c14 100644
> --- a/gcc/targhooks.cc
> +++ b/gcc/targhooks.cc
> @@ -173,6 +173,12 @@ default_return_in_memory (const_tree type,
> return (TYPE_MODE (type) == BLKmode);
> }
>
> +bool
> +default_faster_loading_constant (machine_mode mode, rtx dest, rtx src)
> +{
> + return false;
> +}
> +
> rtx
> default_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED,
> machine_mode mode ATTRIBUTE_UNUSED)
> diff --git a/gcc/testsuite/gcc.target/powerpc/medium_offset.c
> b/gcc/testsuite/gcc.target/powerpc/medium_offset.c
> index f29eba08c38..4889e8fa8ec 100644
> --- a/gcc/testsuite/gcc.target/powerpc/medium_offset.c
> +++ b/gcc/testsuite/gcc.target/powerpc/medium_offset.c
> @@ -1,7 +1,7 @@
> /* { dg-do compile { target { powerpc*-*-* } } } */
> /* { dg-require-effective-target lp64 } */
> /* { dg-options "-O" } */
> -/* { dg-final { scan-assembler-not "\\+4611686018427387904" } } */
> +/* { dg-final { scan-assembler-times {\msldi|pld\M} 1 } } */
>
> static int x;
>
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c
> b/gcc/testsuite/gcc.target/powerpc/pr63281.c
> new file mode 100644
> index 00000000000..58ba074d427
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#define CONST1 0x100803004101001
> +#define CONST2 0x000406002105007
> +
> +void __attribute__ ((noinline))
> +foo (unsigned long long *a, unsigned long long *b)
> +{
> + *a = CONST1;
> + *b = CONST2;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mld|pld\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> index 4f764d0576f..5afb4f79c45 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
> unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>
> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
> +/* { dg-final { scan-assembler-times {\mrldimi|ld|pld\M} 7 } } */
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)