On Mon, Oct 23, 2017 at 12:27:15PM +0200, Uros Bizjak wrote:
> On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek <[email protected]> wrote:
> > On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote:
> >> Hello!
> >>
> >> In PR 82628 Jakub figured out that insn patterns that consume carry
> >> flag were not 100% correct. Due to this issue, combine is able to
> >> simplify various CC_REG propagations that result in invalid code.
> >>
> >> Attached patch fixes (well, mitigates) the above problem by splitting
> >> the double-mode compare after the reload, in the same way other
> >> *_doubleword patterns are handled from "the beginning of the time".
> >
> > I'm afraid this is going to haunt us sooner or later, combine isn't the
> > only pass that uses simplify-rtx.c infrastructure heavily and when we lie
> > in the RTL pattern, eventually something will be simplified wrongly.
> >
> > So, at least we'd need to use UNSPEC for the pattern, like (only lightly
> > tested so far) below.
>
> I agree with the above. Patterns that consume Carry flag are now
> marked with (plus (ltu (...)), but effectively, they behave like
> unspecs. So, I see no problem to change all SBB and ADC to unspec at
> once, similar to the change you proposed in the patch.
So like this (addcarry/subborrow defered to a separate patch)?
Or do you want to use UNSPEC even for the unsigned comparison case,
i.e. from the patch remove the predicates.md/constraints.md part,
sub<mode>3_carry_ccc{,_1} and anything related to that?
As for addcarry/subborrow, the problem is that we expect in the pr67317*
tests that combine is able to notice that the CF setter sets CF to
unconditional 0 and matches the pattern. With the patch I wrote
we end up with the combiner trying to match an insn where the CCC
is set from a TImode comparison:
(parallel [
(set (reg:CC 17 flags)
(compare:CC (zero_extend:TI (plus:DI (reg/v:DI 92 [ a ])
(reg/v:DI 94 [ c ])))
(zero_extend:TI (reg/v:DI 94 [ c ]))))
(set (reg:DI 98)
(plus:DI (reg/v:DI 92 [ a ])
(reg/v:DI 94 [ c ])))
])
So, either we need a define_insn_and_split pattern that would deal with
that (for UNSPEC it would be the same thing, have a define_insn_and_split
that would replace the (ltu...) with (const_int 0)), or perhaps be smarter
during expansion, if we see the first argument is constant 0, expand it
like a normal add instruction with CC setter.
2017-10-23 Jakub Jelinek <[email protected]>
PR target/82628
* config/i386/predicates.md (x86_64_dwzext_immediate_operand): New.
* config/i386/constraints.md (Wf): New constraint.
* config/i386/i386.md (UNSPEC_SBB): New unspec.
(cmp<dwi>_doubleword): Removed.
(sub<mode>3_carry_ccc, *sub<mode>3_carry_ccc_1): New patterns.
(sub<mode>3_carry_ccgz): Use unspec instead of compare.
* config/i386/i386.c (ix86_expand_branch) <case E_TImode>: Don't
expand with cmp<dwi>_doubleword. For LTU and GEU use
sub<mode>3_carry_ccc instead of sub<mode>3_carry_ccgz and use CCCmode.
--- gcc/config/i386/predicates.md.jj 2017-10-23 12:00:13.899355249 +0200
+++ gcc/config/i386/predicates.md 2017-10-23 12:52:20.696576114 +0200
@@ -366,6 +366,31 @@ (define_predicate "x86_64_hilo_int_opera
}
})
+;; Return true if VALUE is a constant integer whose value is
+;; x86_64_immediate_operand value zero extended from word mode to mode.
+(define_predicate "x86_64_dwzext_immediate_operand"
+ (match_code "const_int,const_wide_int")
+{
+ switch (GET_CODE (op))
+ {
+ case CONST_INT:
+ if (!TARGET_64BIT)
+ return UINTVAL (op) <= HOST_WIDE_INT_UC (0xffffffff);
+ return UINTVAL (op) <= HOST_WIDE_INT_UC (0x7fffffff);
+
+ case CONST_WIDE_INT:
+ if (!TARGET_64BIT)
+ return false;
+ return (CONST_WIDE_INT_NUNITS (op) == 2
+ && CONST_WIDE_INT_ELT (op, 1) == 0
+ && (trunc_int_for_mode (CONST_WIDE_INT_ELT (op, 0), SImode)
+ == (HOST_WIDE_INT) CONST_WIDE_INT_ELT (op, 0)));
+
+ default:
+ gcc_unreachable ();
+ }
+})
+
;; Return true if size of VALUE can be stored in a sign
;; extended immediate field.
(define_predicate "x86_64_immediate_size_operand"
--- gcc/config/i386/constraints.md.jj 2017-10-23 12:00:13.850355874 +0200
+++ gcc/config/i386/constraints.md 2017-10-23 12:52:20.697576102 +0200
@@ -332,6 +332,11 @@ (define_constraint "Wd"
of it satisfies the e constraint."
(match_operand 0 "x86_64_hilo_int_operand"))
+(define_constraint "Wf"
+ "32-bit signed integer constant zero extended from word size
+ to double word size."
+ (match_operand 0 "x86_64_dwzext_immediate_operand"))
+
(define_constraint "Z"
"32-bit unsigned integer constant, or a symbolic reference known
to fit that range (for immediate operands in zero-extending x86-64
--- gcc/config/i386/i386.md.jj 2017-10-23 12:51:19.350356044 +0200
+++ gcc/config/i386/i386.md 2017-10-23 12:52:20.701576051 +0200
@@ -112,6 +112,7 @@ (define_c_enum "unspec" [
UNSPEC_STOS
UNSPEC_PEEPSIB
UNSPEC_INSN_FALSE_DEP
+ UNSPEC_SBB
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
@@ -1273,26 +1274,6 @@ (define_expand "cmp<mode>_1"
(compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
(match_operand:SWI48 1 "<general_operand>")))])
-(define_insn_and_split "cmp<dwi>_doubleword"
- [(set (reg:CCGZ FLAGS_REG)
- (compare:CCGZ
- (match_operand:<DWI> 1 "register_operand" "0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "ro<di>")))
- (clobber (match_scratch:<DWI> 0 "=r"))]
- ""
- "#"
- "reload_completed"
- [(set (reg:CC FLAGS_REG)
- (compare:CC (match_dup 1) (match_dup 2)))
- (parallel [(set (reg:CCGZ FLAGS_REG)
- (compare: CCGZ
- (match_dup 4)
- (plus:DWIH
- (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
- (match_dup 5))))
- (clobber (match_dup 3))])]
- "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0],
&operands[3]);")
-
(define_insn "*cmp<mode>_ccno_1"
[(set (reg FLAGS_REG)
(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
@@ -6911,13 +6892,46 @@ (define_insn "*subsi3_carry_zext"
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
-(define_insn "*sub<mode>3_carry_ccgz"
+(define_insn "sub<mode>3_carry_ccc"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
+ (plus:<DWI>
+ (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
+ (clobber (match_scratch:DWIH 0 "=r"))]
+ ""
+ "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*sub<mode>3_carry_ccc_1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
+ (plus:<DWI>
+ (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
+ (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
+ (clobber (match_scratch:DWIH 0 "=r"))]
+ ""
+{
+ operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
+ return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
+}
+ [(set_attr "type" "alu")
+ (set_attr "mode" "<MODE>")])
+
+;; The sign flag is set from the
+;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
+;; result, the overflow flag likewise, but the overflow flag is also
+;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
+(define_insn "sub<mode>3_carry_ccgz"
[(set (reg:CCGZ FLAGS_REG)
- (compare:CCGZ
- (match_operand:DWIH 1 "register_operand" "0")
- (plus:DWIH
- (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
- (match_operand:DWIH 2 "x86_64_general_operand" "rme"))))
+ (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
+ (match_operand:DWIH 2 "x86_64_general_operand" "rme")
+ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
+ UNSPEC_SBB))
(clobber (match_scratch:DWIH 0 "=r"))]
""
"sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
--- gcc/config/i386/i386.c.jj 2017-10-23 12:51:19.349356057 +0200
+++ gcc/config/i386/i386.c 2017-10-23 12:52:20.711575924 +0200
@@ -22378,27 +22378,45 @@ ix86_expand_branch (enum rtx_code code,
switch (code)
{
case LE: case LEU: case GT: case GTU:
- std::swap (op0, op1);
+ std::swap (lo[0], lo[1]);
+ std::swap (hi[0], hi[1]);
code = swap_condition (code);
/* FALLTHRU */
case LT: case LTU: case GE: case GEU:
{
- rtx (*cmp_insn) (rtx, rtx, rtx);
+ rtx (*cmp_insn) (rtx, rtx);
+ rtx (*sbb_insn) (rtx, rtx, rtx);
+ bool uns = (code == LTU || code == GEU);
if (TARGET_64BIT)
- cmp_insn = gen_cmpti_doubleword;
+ {
+ cmp_insn = gen_cmpdi_1;
+ sbb_insn
+ = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
+ }
else
- cmp_insn = gen_cmpdi_doubleword;
+ {
+ cmp_insn = gen_cmpsi_1;
+ sbb_insn
+ = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
+ }
+
+ if (!nonimmediate_operand (lo[0], submode))
+ lo[0] = force_reg (submode, lo[0]);
+ if (!x86_64_general_operand (lo[1], submode))
+ lo[1] = force_reg (submode, lo[1]);
+
+ if (!register_operand (hi[0], submode))
+ hi[0] = force_reg (submode, hi[0]);
+ if ((uns && !nonimmediate_operand (hi[1], submode))
+ || (!uns && !x86_64_general_operand (hi[1], submode)))
+ hi[1] = force_reg (submode, hi[1]);
- if (!register_operand (op0, mode))
- op0 = force_reg (mode, op0);
- if (!x86_64_hilo_general_operand (op1, mode))
- op1 = force_reg (mode, op1);
+ emit_insn (cmp_insn (lo[0], lo[1]));
+ emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
- emit_insn (cmp_insn (gen_rtx_SCRATCH (mode), op0, op1));
-
- tmp = gen_rtx_REG (CCGZmode, FLAGS_REG);
+ tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
ix86_expand_branch (code, tmp, const0_rtx, label);
return;
Jakub