The rtl description of signed/unsigned overflow from subtract
was fine, as far as it goes -- we have CC_Cmode and CC_Vmode
that indicate that only those particular bits are valid.
However, it's not clear how to extend that description to
handle signed comparison, where N == V (GE) N != V (LT) are
the only valid bits.
Using an UNSPEC means that we can unify all 3 usages without
fear that combine will try to infer anything from the rtl.
It also means we need far fewer variants when various inputs
have constants propagated in, and the rtl folds.
Accept -1 for the second input by using ADCS.
* config/aarch64/aarch64.md (UNSPEC_SBCS): New.
(cmp3_carryin): New expander.
(sub3_carryin_cmp): New expander.
(*cmp3_carryin): New pattern.
(*cmp3_carryin_0): New pattern.
(*sub3_carryin_cmp): New pattern.
(*sub3_carryin_cmp_0): New pattern.
(subvti4, usubvti4, negvti3): Use subdi3_carryin_cmp.
(negvdi_carryinV): Remove.
(usub3_carryinC): Remove.
(*usub3_carryinC): Remove.
(*usub3_carryinC_z1): Remove.
(*usub3_carryinC_z2): Remove.
(sub3_carryinV): Remove.
(*sub3_carryinV): Remove.
(*sub3_carryinV_z2): Remove.
* config/aarch64/predicates.md (aarch64_reg_zero_minus1): New.
---
gcc/config/aarch64/aarch64.md| 217 +--
gcc/config/aarch64/predicates.md | 7 +
2 files changed, 94 insertions(+), 130 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 532c114a42e..564dea390be 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -281,6 +281,7 @@
UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag.
UNSPEC_TAG_SPACE ; Translate address to MTE tag address space.
UNSPEC_LD1RO
+UNSPEC_SBCS
])
(define_c_enum "unspecv" [
@@ -2942,7 +2943,7 @@
aarch64_expand_addsubti (operands[0], operands[1], operands[2],
CODE_FOR_subvdi_insn,
CODE_FOR_subdi3_compare1,
- CODE_FOR_subdi3_carryinV);
+ CODE_FOR_subdi3_carryin_cmp);
aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
DONE;
})
@@ -2957,7 +2958,7 @@
aarch64_expand_addsubti (operands[0], operands[1], operands[2],
CODE_FOR_subdi3_compare1,
CODE_FOR_subdi3_compare1,
- CODE_FOR_usubdi3_carryinC);
+ CODE_FOR_subdi3_carryin_cmp);
aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
DONE;
})
@@ -2968,12 +2969,14 @@
(label_ref (match_operand 2 "" ""))]
""
{
-emit_insn (gen_negdi_carryout (gen_lowpart (DImode, operands[0]),
- gen_lowpart (DImode, operands[1])));
-emit_insn (gen_negvdi_carryinV (gen_highpart (DImode, operands[0]),
- gen_highpart (DImode, operands[1])));
-aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
+rtx op0l = gen_lowpart (DImode, operands[0]);
+rtx op1l = gen_lowpart (DImode, operands[1]);
+rtx op0h = gen_highpart (DImode, operands[0]);
+rtx op1h = gen_highpart (DImode, operands[1]);
+emit_insn (gen_negdi_carryout (op0l, op1l));
+emit_insn (gen_subdi3_carryin_cmp (op0h, const0_rtx, op1h));
+aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
DONE;
}
)
@@ -2989,23 +2992,6 @@
[(set_attr "type" "alus_sreg")]
)
-(define_insn "negvdi_carryinV"
- [(set (reg:CC_V CC_REGNUM)
- (compare:CC_V
-(neg:TI (plus:TI
- (ltu:TI (reg:CC CC_REGNUM) (const_int 0))
- (sign_extend:TI (match_operand:DI 1 "register_operand" "r"
-(sign_extend:TI
- (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0))
- (match_dup 1))
- (set (match_operand:DI 0 "register_operand" "=r")
- (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0))
-(match_dup 1]
- ""
- "ngcs\\t%0, %1"
- [(set_attr "type" "alus_sreg")]
-)
-
(define_insn "*sub3_compare0"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
@@ -3370,134 +3356,105 @@
[(set_attr "type" "adc_reg")]
)
-(define_expand "usub3_carryinC"
+(define_expand "sub3_carryin_cmp"
[(parallel
- [(set (reg:CC CC_REGNUM)
- (compare:CC
-(zero_extend:
- (match_operand:GPI 1 "aarch64_reg_or_zero"))
-(plus:
- (zero_extend:
-(match_operand:GPI 2 "register_operand"))
- (ltu: (reg:CC CC_REGNUM) (const_int 0)
- (set (match_operand:GPI 0 "register_operand")
- (minus:GPI
-(minus:GPI (match_dup 1) (match_dup 2))
-(ltu:GPI (reg:CC CC_REGNUM) (const_int 0])]
+[(set (match_dup 3)
+