https://gcc.gnu.org/g:1438f92c9e721133031b72a3884ed7bc17d0401c

commit r16-6776-g1438f92c9e721133031b72a3884ed7bc17d0401c
Author: Wilco Dijkstra <[email protected]>
Date:   Tue Jan 13 16:21:05 2026 +0000

    AArch64: Use anchors for FP constants [PR 121240]
    
    Use anchors for FP constants - instead of using mergeable sections which 
blocks
    anchors, load FP constants from the constdata section.  To avoid the anchor
    loads being deoptimized later, ensure the cost of a CONST_DOUBLE is larger 
than
    the cost of a MEM that loads it from constdata.  Codesize is slightly 
smaller,
    performance on SPECFP2017 is ~0.30% better.
    
    gcc:
            PR target/121240
            * config/aarch64/aarch64.md (mov<mode>): Expand FP immediates early.
            * config/aarch64/aarch64.cc (aarch64_select_rtx_section): Force
            immediates <= 8 bytes to constdata.
            (aarch64_rtx_costs): Increase cost of CONST_DOUBLE loaded from 
memory.
    
    gcc/testsuite:
            PR target/121240
            * gcc.target/aarch64/dbl_mov_immediate_1.c: Adjust test.
            * gcc.target/aarch64/pr63304_1.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64.cc                          | 12 +++++++++---
 gcc/config/aarch64/aarch64.md                          | 12 ++++++++++++
 gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c |  4 ++--
 gcc/testsuite/gcc.target/aarch64/pr63304_1.c           |  2 +-
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4784d3dadc5c..3a453ad4918d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14315,6 +14315,10 @@ aarch64_select_rtx_section (machine_mode mode,
   if (aarch64_can_use_per_function_literal_pools_p ())
     return function_section (current_function_decl);
 
+  /* When using anchors for constants use the readonly section.  */
+  if (known_le (GET_MODE_SIZE (mode), 8))
+    return readonly_data_section;
+
   return default_elf_select_rtx_section (mode, x, align);
 }
 
@@ -15269,11 +15273,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int 
outer ATTRIBUTE_UNUSED,
            *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst;
          else if (!aarch64_float_const_zero_rtx_p (x))
            {
-             /* This will be a load from memory.  */
+             /* Load from constdata - the cost of CONST_DOUBLE should be
+                higher than the cost of a MEM so that later optimizations
+                won't deoptimize an anchor load into a non-anchor load.  */
              if (mode == DFmode || mode == DDmode)
-               *cost += extra_cost->ldst.loadd;
+               *cost += extra_cost->ldst.loadd + 1;
              else
-               *cost += extra_cost->ldst.loadf;
+               *cost += extra_cost->ldst.loadf + 1;
            }
          else
            /* Otherwise this is +0.0.  We get this using MOVI d0, #0
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 71458bf78f5c..4445208bf92c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1960,6 +1960,18 @@
        emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
        DONE;
       }
+
+    /* Expand into a literal load using anchors.  */
+    if (GET_CODE (operands[1]) == CONST_DOUBLE
+       && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+       && !aarch64_float_const_representable_p (operands[1])
+       && !aarch64_float_const_zero_rtx_p (operands[1])
+       && !aarch64_float_const_rtx_p (operands[1]))
+      {
+       operands[1] = force_const_mem (<MODE>mode, operands[1]);
+       emit_move_insn (operands[0], operands[1]);
+       DONE;
+      }
   }
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c 
b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
index 8332035d80b9..fac32df8c246 100644
--- a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
@@ -41,8 +41,8 @@ double d4(void)
 
 /* { dg-final { scan-assembler-times "movi\td\[0-9\]+, #?0"                 1 
} } */
 
-/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, \.LC\[0-9\]"         2 
} } */
-/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], 
#:lo12:\.LC\[0-9\]\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, "         2 } } */
+/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], #:lo12:" 2 
} } */
 
 /* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, 1\\\.5e\\\+0"        1 
} } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c 
b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
index 5d519d817ccd..134fd469b87c 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
@@ -45,4 +45,4 @@ cal3 (double a)
     return 1;
 }
 
-/* { dg-final { scan-assembler-times "adrp" 6 } } */
+/* { dg-final { scan-assembler-times "adrp" 4 } } */

Reply via email to