On Tue, Jan 13, 2026 at 1:34 PM Wilco Dijkstra <[email protected]> wrote:
>
>
> Use anchors for FP constants - instead of using mergeable sections which
> blocks
> anchors, load FP constants from the constdata section. To avoid the anchor
> loads
> being deoptimized later, ensure the cost of a CONST_DOUBLE is larger than the
> cost
> of a MEM that loads it from constdata. Codesize is slightly smaller,
> performance
> on SPECFP2017 is ~0.30% better.
>
> Passes regress, OK for commit?
Ok.
>
> gcc:
> PR target/121240
> * config/aarch64/aarch64.md (mov<mode>): Expand FP immediates early.
> * config/aarch64/aarch64.cc (aarch64_select_rtx_section): Force
> immediates <= 8 bytes to constdata.
> (aarch64_rtx_costs): Increase cost of CONST_DOUBLE loaded from memory.
>
> gcc/testsuite:
> PR target/121240
> * gcc.target/aarch64/dbl_mov_immediate_1.c: Adjust test.
> * gcc.target/aarch64/pr63304_1.c: Likewise.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index
> 4784d3dadc5c8811e84d042bcb24cf2928520219..3a453ad4918d5e3d9f37e1937725e6cdd74f3af6
> 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -14315,6 +14315,10 @@ aarch64_select_rtx_section (machine_mode mode,
> if (aarch64_can_use_per_function_literal_pools_p ())
> return function_section (current_function_decl);
>
> + /* When using anchors for constants use the readonly section. */
> + if (known_le (GET_MODE_SIZE (mode), 8))
> + return readonly_data_section;
> +
> return default_elf_select_rtx_section (mode, x, align);
> }
>
> @@ -15269,11 +15273,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int
> outer ATTRIBUTE_UNUSED,
> *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst;
> else if (!aarch64_float_const_zero_rtx_p (x))
> {
> - /* This will be a load from memory. */
> + /* Load from constdata - the cost of CONST_DOUBLE should be
> + higher than the cost of a MEM so that later optimizations
> + won't deoptimize an anchor load into a non-anchor load. */
> if (mode == DFmode || mode == DDmode)
> - *cost += extra_cost->ldst.loadd;
> + *cost += extra_cost->ldst.loadd + 1;
> else
> - *cost += extra_cost->ldst.loadf;
> + *cost += extra_cost->ldst.loadf + 1;
> }
> else
> /* Otherwise this is +0.0. We get this using MOVI d0, #0
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index
> 71458bf78f5cc4d926d7c5e0467daec9a5d75a03..4445208bf92ce0e08b72fde3de0f6dbc238cac3b
> 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1960,6 +1960,18 @@ (define_expand "mov<mode>"
> emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
> DONE;
> }
> +
> + /* Expand into a literal load using anchors. */
> + if (GET_CODE (operands[1]) == CONST_DOUBLE
> + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
> + && !aarch64_float_const_representable_p (operands[1])
> + && !aarch64_float_const_zero_rtx_p (operands[1])
> + && !aarch64_float_const_rtx_p (operands[1]))
> + {
> + operands[1] = force_const_mem (<MODE>mode, operands[1]);
> + emit_move_insn (operands[0], operands[1]);
> + DONE;
> + }
> }
> )
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
> b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
> index
> 8332035d80b91c497fc032cff1043922c328a701..fac32df8c24603d73840343e8069bf659f1e69fc
> 100644
> --- a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
> @@ -41,8 +41,8 @@ double d4(void)
>
> /* { dg-final { scan-assembler-times "movi\td\[0-9\]+, #?0"
> 1 } } */
>
> -/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, \.LC\[0-9\]"
> 2 } } */
> -/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\],
> #:lo12:\.LC\[0-9\]\\\]" 2 } } */
> +/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, " 2 } } */
> +/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], #:lo12:"
> 2 } } */
>
> /* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, 1\\\.5e\\\+0"
> 1 } } */
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
> b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
> index
> 5d519d817ccdfe85849496dec654b1e8ac7a2888..134fd469b87c265a189507c82b49f8ad08248e14
> 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
> @@ -45,4 +45,4 @@ cal3 (double a)
> return 1;
> }
>
> -/* { dg-final { scan-assembler-times "adrp" 6 } } */
> +/* { dg-final { scan-assembler-times "adrp" 4 } } */
>