Hi,
This is a v3 patch which is rebased on top of the SME changes.
Otherwise it is the same as v2, posted here:
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639367.html
Bootstrapped/regtested as a series on aarch64-linux-gnu, OK for trunk?
Thanks,
Alex
-- >8 --
Thus far the writeback forms of ldp/stp have been exclusively used in
prologue and epilogue code for saving/restoring of registers to/from the
stack.
As such, forms of ldp/stp that weren't needed for prologue/epilogue code
weren't supported by the aarch64 backend. This patch generalizes the
load/store pair writeback patterns to allow:
- Base registers other than the stack pointer.
- Modes that weren't previously supported.
- Combinations of distinct modes provided they have the same size.
- Pre/post variants that weren't previously needed in prologue/epilogue
code.
We make quite some effort to avoid a combinatorial explosion in the
number of patterns generated (and those in the source) by making
extensive use of special predicates.
An updated version of the upcoming ldp/stp pass can generate the
writeback forms, so this patch is motivated by that.
This patch doesn't add zero-extending or sign-extending forms of the
writeback patterns; that is left for future work.
gcc/ChangeLog:
* config/aarch64/aarch64-protos.h (aarch64_ldpstp_operand_mode_p):
Declare.
* config/aarch64/aarch64.cc (aarch64_gen_storewb_pair): Build RTL
directly instead of invoking named pattern.
(aarch64_gen_loadwb_pair): Likewise.
(aarch64_ldpstp_operand_mode_p): New.
* config/aarch64/aarch64.md (loadwb_pair_): Replace
with
...
(*loadwb_post_pair_): ... this. Generalize as described
in cover letter.
(loadwb_pair_): Delete (superseded by the
above).
(*loadwb_post_pair_16): New.
(*loadwb_pre_pair_): New.
(loadwb_pair_): Delete.
(*loadwb_pre_pair_16): New.
(storewb_pair_): Replace with ...
(*storewb_pre_pair_): ... this. Generalize as
described in cover letter.
(*storewb_pre_pair_16): New.
(storewb_pair_): Delete.
(*storewb_post_pair_): New.
(storewb_pair_): Delete.
(*storewb_post_pair_16): New.
* config/aarch64/predicates.md (aarch64_mem_pair_operator): New.
(pmode_plus_operator): New.
(aarch64_ldp_reg_operand): New.
(aarch64_stp_reg_operand): New.
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index 42f7bfad5cb..ee0f0a18541 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1041,6 +1041,7 @@ bool aarch64_operands_ok_for_ldpstp (rtx *, bool,
machine_mode);
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode);
void aarch64_swap_ldrstr_operands (rtx *, bool);
+bool aarch64_ldpstp_operand_mode_p (machine_mode);
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
tree, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d870973dcd6..baa2b6ca3f7 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8097,23 +8097,15 @@ static rtx
aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
{
- switch (mode)
-{
-case E_DImode:
- return gen_storewb_pairdi_di (base, base, reg, reg2,
- GEN_INT (-adjustment),
- GEN_INT (UNITS_PER_WORD - adjustment));
-case E_DFmode:
- return gen_storewb_pairdf_di (base, base, reg, reg2,
- GEN_INT (-adjustment),
- GEN_INT (UNITS_PER_WORD - adjustment));
-case E_TFmode:
- return gen_storewb_pairtf_di (base, base, reg, reg2,
- GEN_INT (-adjustment),
- GEN_INT (UNITS_PER_VREG - adjustment));
-default:
- gcc_unreachable ();
-}
+ rtx new_base = plus_constant (Pmode, base, -adjustment);
+ rtx mem = gen_frame_mem (mode, new_base);
+ rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode));
+
+ return gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3,
+ gen_rtx_SET (base, new_base),
+ gen_rtx_SET (mem, reg),
+ gen_rtx_SET (mem2, reg2)));
}
/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
@@ -8145,20 +8137,15 @@ static rtx
aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
{
- switch (mode)
-{
-case E_DImode:
- return gen_loadwb_pairdi_di (base, base,