Under new pro/epi code, we could also utilize our store write-back to optimize
stack adjustment when there is no frame pointer.
* if there is candidate reg pair and adjustment amount is less than 512 then we
could use aarch64's paired store write-back.
* if there is only a single candidate reg and adjustment amount is less than
256,
we could use aarch64's single store write-back.
* otherwise use explictly subtraction to finish stack adjustment.
Improved testcases:
gcc.target/aarch64/test_frame_1.c
gcc.target/aarch64/test_frame_10.c
gcc.target/aarch64/test_frame_2.c
gcc.target/aarch64/test_frame_4.c
gcc.target/aarch64/test_frame_6.c
gcc.target/aarch64/test_frame_7.c
gcc.target/aarch64/test_frame_8.c
gcc.target/aarch64/test_fp_attribute_1.c
ok for install?
gcc/
* config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function.
(aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed.
gcc/testsuite/
* gcc.target/aarch64/test_frame_1.c: Match optimized instruction sequences.
* gcc.target/aarch64/test_frame_10.c: Likewise.
* gcc.target/aarch64/test_frame_2.c: Likewise.
* gcc.target/aarch64/test_frame_4.c: Likewise.
* gcc.target/aarch64/test_frame_6.c: Likewise.
* gcc.target/aarch64/test_frame_7.c: Likewise.
* gcc.target/aarch64/test_frame_8.c: Likewise.
* gcc.target/aarch64/test_fp_attribute_1.c: Likewise.
From e3ab087747c2f4ddeef0482983b2ebc3bbdc131f Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.w...@arm.com>
Date: Tue, 17 Jun 2014 22:24:44 +0100
Subject: [PATCH 17/19] [AArch64/GCC][18/20] Optimize prologue when there is
no frame pointer
Under new pro/epi code, we could also utilize our store write-back to optimize
stack adjustment when there is no frame pointer.
* if there is candidate reg pair and adjustment amount is less than 512 then we
could use aarch64's paired store write-back.
* if there is only a single candidate reg and adjustment amount is less than
256, we could use aarch64's single store write-back.
* otherwise use explictly subtraction to finish stack adjustment.
Improved testcases:
gcc.target/aarch64/test_frame_1.c
gcc.target/aarch64/test_frame_10.c
gcc.target/aarch64/test_frame_2.c
gcc.target/aarch64/test_frame_4.c
gcc.target/aarch64/test_frame_6.c
gcc.target/aarch64/test_frame_7.c
gcc.target/aarch64/test_frame_8.c
gcc.target/aarch64/test_fp_attribute_1.c
2014-06-16 Jiong Wang <jiong.w...@arm.com>
Marcus Shawcroft <marcus.shawcr...@arm.com>
gcc/
* config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function.
(aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed.
gcc/testsuite/
* gcc.target/aarch64/test_frame_1.c: Match optimized instruction sequences.
* gcc.target/aarch64/test_frame_10.c: Likewise.
* gcc.target/aarch64/test_frame_2.c: Likewise.
* gcc.target/aarch64/test_frame_4.c: Likewise.
* gcc.target/aarch64/test_frame_6.c: Likewise.
* gcc.target/aarch64/test_frame_7.c: Likewise.
* gcc.target/aarch64/test_frame_8.c: Likewise.
* gcc.target/aarch64/test_fp_attribute_1.c: Likewise.
---
gcc/config/aarch64/aarch64.c | 58 +++++++++++++++-----
.../gcc.target/aarch64/test_fp_attribute_1.c | 2 +-
gcc/testsuite/gcc.target/aarch64/test_frame_1.c | 5 +-
gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 5 +-
gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 6 +-
gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 5 +-
gcc/testsuite/gcc.target/aarch64/test_frame_6.c | 5 +-
gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 5 +-
gcc/testsuite/gcc.target/aarch64/test_frame_8.c | 5 +-
9 files changed, 74 insertions(+), 22 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 26d5fba..365fdd4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1927,6 +1927,22 @@ aarch64_next_callee_save (unsigned regno, unsigned limit)
return regno;
}
+static void
+aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
+ HOST_WIDE_INT adjustment)
+ {
+ rtx base_rtx = stack_pointer_rtx;
+ rtx insn, reg, mem;
+
+ reg = gen_rtx_REG (mode, regno);
+ mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
+ plus_constant (Pmode, base_rtx, -adjustment));
+ mem = gen_rtx_MEM (mode, mem);
+
+ insn = emit_move_insn (mem, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+}
+
static rtx
aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
@@ -2276,11 +2292,10 @@ aarch64_expand_prologue (void)
{
bool skip_wb = false;
- /* Save the frame pointer and lr if the frame pointer is needed
- first. Make the frame pointer point to the location of the
- old frame pointer on the stack. */
if (frame_pointer_needed)
{
+ skip_wb = true;
+
if (fp_offset)
{
insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
@@ -2288,12 +2303,11 @@ aarch64_expand_prologue (void)
RTX_FRAME_RELATED_P (insn) = 1;
aarch64_set_frame_expr (gen_rtx_SET
(Pmode, stack_pointer_rtx,
- gen_rtx_MINUS (Pmode,
- stack_pointer_rtx,
+ gen_rtx_MINUS (Pmode, stack_pointer_rtx,
GEN_INT (offset))));
aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
- R30_REGNUM, skip_wb);
+ R30_REGNUM, false);
}
else
aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
@@ -2311,20 +2325,36 @@ aarch64_expand_prologue (void)
RTX_FRAME_RELATED_P (insn) = 1;
insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
hard_frame_pointer_rtx));
-
- aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R28_REGNUM,
- skip_wb);
}
else
{
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- GEN_INT (-offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
+ unsigned reg1 = cfun->machine->frame.wb_candidate1;
+ unsigned reg2 = cfun->machine->frame.wb_candidate2;
- aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
- skip_wb);
+ if (fp_offset
+ || reg1 == FIRST_PSEUDO_REGISTER
+ || (reg2 == FIRST_PSEUDO_REGISTER
+ && offset >= 256))
+ {
+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+ GEN_INT (-offset)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ else
+ {
+ enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
+
+ skip_wb = true;
+
+ if (reg2 == FIRST_PSEUDO_REGISTER)
+ aarch64_pushwb_single_reg (mode1, reg1, offset);
+ else
+ aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
+ }
}
+ aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
+ skip_wb);
aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
skip_wb);
}
diff --git a/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c b/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
index 7538250..960174a 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
@@ -21,6 +21,6 @@ non_leaf_2 (void)
leaf ();
}
-/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_1.c b/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
index feea7a2..e9d04aa 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
@@ -6,9 +6,12 @@
* optimized code should use "str !" for stack adjustment. */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern (test1, 200, )
t_frame_run (test1)
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
index 2892c5f..b646a71 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
@@ -8,9 +8,12 @@
the first subtractions could be optimized into "stp !". */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
t_frame_run (test10)
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
index aa15dae..b972664 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
@@ -6,9 +6,13 @@
* optimized code should use "stp !" for stack adjustment. */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern (test2, 200, "x19")
t_frame_run (test2)
+
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
index c45e740..5a9a919 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
@@ -6,9 +6,12 @@
* we can use "stp !" to optimize stack adjustment. */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern (test4, 400, "x19")
t_frame_run (test4)
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
index 54f646b..6056f57 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
@@ -7,9 +7,12 @@
the second subtraction should use "str !". */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern (test6, 700, )
t_frame_run (test6)
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
index aa97bc0..991860c 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
@@ -7,9 +7,12 @@
the second subtraction should use "stp !". */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern (test7, 700, "x19")
t_frame_run (test7)
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
index f75f080..4a4d93b 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
@@ -5,9 +5,12 @@
* number of callee-saved reg == 1. */
/* { dg-do run } */
-/* { dg-options "-O2 -fomit-frame-pointer" } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
#include "test_frame_common.h"
t_frame_pattern_outgoing (test8, 700, , 8, a[8])
t_frame_run (test8)
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--
1.7.9.5