https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113613

--- Comment #6 from Alex Coplan <acoplan at gcc dot gnu.org> ---
FWIW, if I move ldp_fusion1 before early_ra, with:

diff --git a/gcc/config/aarch64/aarch64-passes.def
b/gcc/config/aarch64/aarch64-passes.def
index 769d48f4faa..3853f6bf7a4 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -18,6 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */

+INSERT_PASS_BEFORE (pass_sched, 1, pass_ldp_fusion);
 INSERT_PASS_BEFORE (pass_sched, 1, pass_aarch64_early_ra);
 INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
 INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
@@ -25,5 +26,4 @@ INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue,
1, pass_switch_pstat
 INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
 INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
-INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion);
 INSERT_PASS_BEFORE (pass_peephole2, 1, pass_ldp_fusion);

we get:

f:
.LFB0:
        .cfi_startproc
        adrp    x0, .LANCHOR0
        add     x0, x0, :lo12:.LANCHOR0
        ldp     d31, d30, [x0]
        ldp     d29, d28, [x0, 32]
        fadd    v29.2s, v31.2s, v29.2s
        fadd    v28.2s, v30.2s, v28.2s
        stp     d29, d28, [x0]
        ret

note that this does use more registers, though, so it's not necessarily a clear
win in the general case (particularly if register pressure is already high).

Reply via email to