Use anchors for FP constants - instead of using mergeable sections which blocks
anchors, load FP constants from the constdata section. To avoid the anchor
loads
being deoptimized later, ensure the cost of a CONST_DOUBLE is larger than the
cost
of a MEM that loads it from constdata. Codesize is slightly smaller,
performance
on SPECFP2017 is ~0.30% better.
Passes regress, OK for commit?
gcc:
PR target/121240
* config/aarch64/aarch64.md (mov<mode>): Expand FP immediates early.
* config/aarch64/aarch64.cc (aarch64_select_rtx_section): Force
immediates <= 8 bytes to constdata.
(aarch64_rtx_costs): Increase cost of CONST_DOUBLE loaded from memory.
gcc/testsuite:
PR target/121240
* gcc.target/aarch64/dbl_mov_immediate_1.c: Adjust test.
* gcc.target/aarch64/pr63304_1.c: Likewise.
---
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
4784d3dadc5c8811e84d042bcb24cf2928520219..3a453ad4918d5e3d9f37e1937725e6cdd74f3af6
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14315,6 +14315,10 @@ aarch64_select_rtx_section (machine_mode mode,
if (aarch64_can_use_per_function_literal_pools_p ())
return function_section (current_function_decl);
+ /* When using anchors for constants use the readonly section. */
+ if (known_le (GET_MODE_SIZE (mode), 8))
+ return readonly_data_section;
+
return default_elf_select_rtx_section (mode, x, align);
}
@@ -15269,11 +15273,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int
outer ATTRIBUTE_UNUSED,
*cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst;
else if (!aarch64_float_const_zero_rtx_p (x))
{
- /* This will be a load from memory. */
+ /* Load from constdata - the cost of CONST_DOUBLE should be
+ higher than the cost of a MEM so that later optimizations
+ won't deoptimize an anchor load into a non-anchor load. */
if (mode == DFmode || mode == DDmode)
- *cost += extra_cost->ldst.loadd;
+ *cost += extra_cost->ldst.loadd + 1;
else
- *cost += extra_cost->ldst.loadf;
+ *cost += extra_cost->ldst.loadf + 1;
}
else
/* Otherwise this is +0.0. We get this using MOVI d0, #0
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index
71458bf78f5cc4d926d7c5e0467daec9a5d75a03..4445208bf92ce0e08b72fde3de0f6dbc238cac3b
100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1960,6 +1960,18 @@ (define_expand "mov<mode>"
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
DONE;
}
+
+ /* Expand into a literal load using anchors. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE
+ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && !aarch64_float_const_zero_rtx_p (operands[1])
+ && !aarch64_float_const_rtx_p (operands[1]))
+ {
+ operands[1] = force_const_mem (<MODE>mode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
}
)
diff --git a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
index
8332035d80b91c497fc032cff1043922c328a701..fac32df8c24603d73840343e8069bf659f1e69fc
100644
--- a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
@@ -41,8 +41,8 @@ double d4(void)
/* { dg-final { scan-assembler-times "movi\td\[0-9\]+, #?0" 1
} } */
-/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, \.LC\[0-9\]" 2
} } */
-/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\],
#:lo12:\.LC\[0-9\]\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, " 2 } } */
+/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], #:lo12:" 2
} } */
/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, 1\\\.5e\\\+0" 1
} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
index
5d519d817ccdfe85849496dec654b1e8ac7a2888..134fd469b87c265a189507c82b49f8ad08248e14
100644
--- a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
@@ -45,4 +45,4 @@ cal3 (double a)
return 1;
}
-/* { dg-final { scan-assembler-times "adrp" 6 } } */
+/* { dg-final { scan-assembler-times "adrp" 4 } } */