Richard Henderson <[email protected]> writes:
> The save/restore_stack_nonlocal patterns passed a DImode rtx
> to gen_tbranch_neqi3 for a QImode compare. The tbranch expander
> did not do what it said on the tin, that is: emit TBNZ.
> It only made it as far as AND+CMP+B.cond.
Yeah, that was done to allow ifcombine to have a go first
(g:17ae956c0fa6baac3d22764019d5dd5ebf5c2b11). But the original
pattern was restricted in g:8e26ac4749c5ddf827e18a846b1010b091f76fa2
to "bit 0 of a QI or HI" that we have now.
If all tests pass without the expander then I suppose we at least need
a new test to justify the expander's existence.
It doesn't look like any other target has implemented the tbranch optab,
so if we do remove the expander, I suppose we should remove the optab
itself too (separately).
The fix for the save/restore_stack_nonlocal mode mismatch is ok
either way though.
Thanks,
Richard
>
> But since we're seeding r16 with 1, GCSEnabled will clear the
> only set bit in r16, so we can use CBNZ instead of TBNZ.
>
> gcc:
> * config/aarch64/aarch64.md (tbranch_<EQL><SHORT>3): Remove.
> (save_stack_nonlocal): Use aarch64_gen_compare_zero_and_branch.
> (restore_stack_nonlocal): Likewise.
> ---
> gcc/config/aarch64/aarch64.md | 32 ++++---------------
> .../gcc.target/aarch64/gcs-nonlocal-3.c | 2 +-
> 2 files changed, 8 insertions(+), 26 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 3da5bc2122d..9aea00cf45a 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1017,24 +1017,6 @@
> ;; Test bit and branch
> ;; -------------------------------------------------------------------
>
> -(define_expand "tbranch_<code><mode>3"
> - [(set (pc) (if_then_else (EQL
> - (match_operand:SHORT 0 "register_operand")
> - (match_operand 1 "const0_operand"))
> - (label_ref (match_operand 2 ""))
> - (pc)))]
> - ""
> -{
> - rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
> - rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
> - rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]),
> - <MODE>mode);
> - emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
> - operands[1] = const0_rtx;
> - operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
> - operands[1]);
> -})
> -
> (define_insn "@aarch64_tbz<optab><ALLI:mode><GPI:mode>"
> [(set (pc) (if_then_else (EQL
> (zero_extract:GPI
> @@ -1413,16 +1395,16 @@
> /* Save GCS with code like
> mov x16, 1
> chkfeat x16
> - tbnz x16, 0, .L_done
> + cbnz x16, .L_done
> mrs tmp, gcspr_el0
> str tmp, [%0, 8]
> .L_done: */
>
> - rtx done_label = gen_label_rtx ();
> + auto done_label = gen_label_rtx ();
> rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
> emit_move_insn (r16, const1_rtx);
> emit_insn (gen_aarch64_chkfeat ());
> - emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
> + emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16,
> done_label));
> rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE
> (Pmode));
> rtx gcs = gen_reg_rtx (Pmode);
> emit_insn (gen_aarch64_load_gcspr (gcs));
> @@ -1445,7 +1427,7 @@
> /* Restore GCS with code like
> mov x16, 1
> chkfeat x16
> - tbnz x16, 0, .L_done
> + cbnz x16, .L_done
> ldr tmp1, [%1, 8]
> mrs tmp2, gcspr_el0
> subs tmp2, tmp1, tmp2
> @@ -1456,12 +1438,12 @@
> b.ne .L_loop
> .L_done: */
>
> - rtx loop_label = gen_label_rtx ();
> - rtx done_label = gen_label_rtx ();
> + auto loop_label = gen_label_rtx ();
> + auto done_label = gen_label_rtx ();
> rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
> emit_move_insn (r16, const1_rtx);
> emit_insn (gen_aarch64_chkfeat ());
> - emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
> + emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16,
> done_label));
> rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE
> (Pmode));
> rtx gcs_old = gen_reg_rtx (Pmode);
> emit_move_insn (gcs_old, gcs_slot);
> diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> index e2391555150..7a76b14e1e7 100644
> --- a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-3.c
> @@ -7,7 +7,7 @@ void run (void (*)());
> ** bar.0:
> ** ...
> ** hint 40 // chkfeat x16
> -** tbnz w16, 0, (\.L[0-9]+)
> +** cbnz x16, (\.L[0-9]+)
> ** ...
> ** mrs (x[0-9]+), s3_3_c2_c5_1 // gcspr_el0
> ** subs x[0-9]+, x[0-9]+, \2