On Sat, Jan 15, 2022 at 09:29:05AM +0100, Uros Bizjak wrote:
> > --- gcc/config/i386/i386.md.jj 2022-01-14 11:51:34.432384170 +0100
> > +++ gcc/config/i386/i386.md 2022-01-14 18:22:41.140906449 +0100
> > @@ -23886,6 +23886,18 @@ (define_insn "hreset"
> > [(set_attr "type" "other")
> > (set_attr "length" "4")])
> >
> > +;; Spaceship optimization
> > +(define_expand "spaceship<mode>3"
> > + [(match_operand:SI 0 "register_operand")
> > + (match_operand:MODEF 1 "cmp_fp_expander_operand")
> > + (match_operand:MODEF 2 "cmp_fp_expander_operand")]
> > + "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
> > + && TARGET_CMOVE && TARGET_IEEE_FP"
>
> Is there a reason that this pattern is limited to TARGET_IEEE_FP?
> During the expansion in ix86_expand_fp_spaceship, we can just skip
> jump on unordered, while ix86_expand_fp_compare will emit the correct
> comparison mode depending on TARGET_IEEE_FP.
For -ffast-math I thought <=> expands to just x == y ? 0 : x < y ? -1 : 1
but apparently not, it is still x == y ? 0 : x < y ? -1 : x > y ? 1 : 2
but it is still optimized much better than the non-fast-math case
without the patch:
comisd %xmm1, %xmm0
je .L12
jb .L13
movl $1, %edx
movl $2, %eax
cmova %edx, %eax
ret
.p2align 4,,10
.p2align 3
.L12:
xorl %eax, %eax
ret
.p2align 4,,10
.p2align 3
.L13:
movl $-1, %eax
ret
so just one comparison but admittedly the
movl $1, %edx
movl $2, %eax
cmova %edx, %eax
part is unnecessary.
So below is an incremental patch that handles even the !HONORS_NANS
case at the gimple pattern matching (while for HONOR_NANS we must
obey that for NaN operand(s) >/>=/</<= is false and so need to make sure
we look for the third comparison on the false edge of the second one,
for !HONOR_NANS that is not the case. With the incremental patch we get:
comisd %xmm1, %xmm0
je .L2
seta %al
leal -1(%rax,%rax), %eax
ret
.p2align 4,,10
.p2align 3
.L2:
xorl %eax, %eax
ret
for -O2 -ffast-math.
Also, I've added || (TARGET_SAHF && TARGET_USE_SAHF), because apparently
we can handle that case nicely too, it is just the IX86_FPCMP_ARITH
case where fp_compare already emits very specific code (and we can't
call ix86_expand_fp_compare 3 times because that would for IEEE_FP
emit different comparisons which couldn't be CSEd).
I'll add also -ffast-math testsuite coverage and retest.
Also, I wonder if I shouldn't handle XFmode the same, thoughts on that?
> > + gcc_checking_assert (ix86_fp_comparison_strategy (GT) ==
> > IX86_FPCMP_COMI);
> > + rtx gt = ix86_expand_fp_compare (GT, op0, op1);
> > + rtx l0 = gen_label_rtx ();
> > + rtx l1 = gen_label_rtx ();
> > + rtx l2 = gen_label_rtx ();
> > + rtx lend = gen_label_rtx ();
> > + rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
> > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> > + rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
> > + gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
> > + rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > + add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
>
> Please also add JUMP_LABEL to the insn.
>
> > + rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
> > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
> > + gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
> > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > + add_reg_br_prob_note (jmp, profile_probability::unlikely ());
> > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
> > + gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
> > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > + add_reg_br_prob_note (jmp, profile_probability::even ());
> > + emit_move_insn (dest, constm1_rtx);
> > + emit_jump (lend);
> > + emit_label (l0);
>
> and LABEL_NUSES label.
Why? That seems to be a waste of time to me, unless something uses them
already during expansion. Because pass_expand::execute
runs:
/* We need JUMP_LABEL be set in order to redirect jumps, and hence
split edges which edge insertions might do. */
rebuild_jump_labels (get_insns ());
which resets all LABEL_NUSES to 0 (well, to:
if (LABEL_P (insn))
LABEL_NUSES (insn) = (LABEL_PRESERVE_P (insn) != 0);
and then recomputes them and adds JUMP_LABEL if needed:
JUMP_LABEL (insn) = label;
--- gcc/config/i386/i386.md.jj 2022-01-15 09:51:25.404468342 +0100
+++ gcc/config/i386/i386.md 2022-01-15 09:56:31.602109421 +0100
@@ -23892,7 +23892,7 @@ (define_expand "spaceship<mode>3"
(match_operand:MODEF 1 "cmp_fp_expander_operand")
(match_operand:MODEF 2 "cmp_fp_expander_operand")]
"(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
- && TARGET_CMOVE && TARGET_IEEE_FP"
+ && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
{
ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
DONE;
--- gcc/config/i386/i386-expand.c.jj 2022-01-15 09:51:25.411468242 +0100
+++ gcc/config/i386/i386-expand.c 2022-01-15 10:38:26.924333651 +0100
@@ -2884,18 +2884,23 @@ ix86_expand_setcc (rtx dest, enum rtx_co
void
ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
{
- gcc_checking_assert (ix86_fp_comparison_strategy (GT) == IX86_FPCMP_COMI);
+ gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH);
rtx gt = ix86_expand_fp_compare (GT, op0, op1);
rtx l0 = gen_label_rtx ();
rtx l1 = gen_label_rtx ();
- rtx l2 = gen_label_rtx ();
+ rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX;
rtx lend = gen_label_rtx ();
- rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
- gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
- rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
+ rtx tmp;
+ rtx_insn *jmp;
+ if (l2)
+ {
+ rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
+ gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
- rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
- add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
+ jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+ add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
+ }
rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
@@ -2914,8 +2919,11 @@ ix86_expand_fp_spaceship (rtx dest, rtx
emit_label (l1);
emit_move_insn (dest, const1_rtx);
emit_jump (lend);
- emit_label (l2);
- emit_move_insn (dest, const2_rtx);
+ if (l2)
+ {
+ emit_label (l2);
+ emit_move_insn (dest, const2_rtx);
+ }
emit_label (lend);
}
--- gcc/tree-ssa-math-opts.c.jj 2022-01-15 09:51:25.402468370 +0100
+++ gcc/tree-ssa-math-opts.c 2022-01-15 10:35:52.366533951 +0100
@@ -4694,7 +4694,6 @@ optimize_spaceship (gimple *stmt)
tree arg1 = gimple_cond_lhs (stmt);
tree arg2 = gimple_cond_rhs (stmt);
if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
- || !HONOR_NANS (TREE_TYPE (arg1))
|| optab_handler (spaceship_optab,
TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
|| operand_equal_p (arg1, arg2, 0))
@@ -4732,56 +4731,67 @@ optimize_spaceship (gimple *stmt)
return;
}
- /* With NaNs, </<=/>/>= are false, so we need to look for the
- third comparison on the false edge from whatever non-equality
- comparison the second comparison is. */
- int i = (EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0;
- bb2 = EDGE_SUCC (bb1, i)->dest;
- g = last_stmt (bb2);
- if (g == NULL
- || gimple_code (g) != GIMPLE_COND
- || !single_pred_p (bb2)
- || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
- ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
- : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
- || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
- || !cond_only_block_p (bb2)
- || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
- return;
-
- enum tree_code ccode2
- = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
- switch (gimple_cond_code (g))
+ for (int i = 0; i < 2; ++i)
{
- case LT_EXPR:
- case LE_EXPR:
+ /* With NaNs, </<=/>/>= are false, so we need to look for the
+ third comparison on the false edge from whatever non-equality
+ comparison the second comparison is. */
+ if (HONOR_NANS (TREE_TYPE (arg1))
+ && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
+ continue;
+
+ bb2 = EDGE_SUCC (bb1, i)->dest;
+ g = last_stmt (bb2);
+ if (g == NULL
+ || gimple_code (g) != GIMPLE_COND
+ || !single_pred_p (bb2)
+ || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
+ ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
+ : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
+ || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
+ || !cond_only_block_p (bb2)
+ || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
+ continue;
+
+ enum tree_code ccode2
+ = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
+ switch (gimple_cond_code (g))
+ {
+ case LT_EXPR:
+ case LE_EXPR:
+ break;
+ case GT_EXPR:
+ case GE_EXPR:
+ ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
+ break;
+ default:
+ continue;
+ }
+ if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
+ return;
+
+ if ((ccode == LT_EXPR)
+ ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
+ {
+ em1 = EDGE_SUCC (bb1, 1 - i);
+ e1 = EDGE_SUCC (bb2, 0);
+ e2 = EDGE_SUCC (bb2, 1);
+ if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
+ std::swap (e1, e2);
+ }
+ else
+ {
+ e1 = EDGE_SUCC (bb1, 1 - i);
+ em1 = EDGE_SUCC (bb2, 0);
+ e2 = EDGE_SUCC (bb2, 1);
+ if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
+ std::swap (em1, e2);
+ }
break;
- case GT_EXPR:
- case GE_EXPR:
- ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
- break;
- default:
- return;
}
- if (ccode == ccode2)
- return;
- if (ccode == LT_EXPR)
- {
- em1 = EDGE_SUCC (bb1, 1 - i);
- e1 = EDGE_SUCC (bb2, 0);
- e2 = EDGE_SUCC (bb2, 1);
- if ((e1->flags & EDGE_TRUE_VALUE) == 0)
- std::swap (e1, e2);
- }
- else
- {
- e1 = EDGE_SUCC (bb1, 1 - i);
- em1 = EDGE_SUCC (bb2, 0);
- e2 = EDGE_SUCC (bb2, 1);
- if ((em1->flags & EDGE_TRUE_VALUE) == 0)
- std::swap (em1, e2);
- }
+ if (em1 == NULL)
+ return;
g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
tree lhs = make_ssa_name (integer_type_node);
@@ -4796,14 +4806,19 @@ optimize_spaceship (gimple *stmt)
g = last_stmt (bb1);
cond = as_a <gcond *> (g);
- gimple_cond_set_code (cond, EQ_EXPR);
gimple_cond_set_lhs (cond, lhs);
if (em1->src == bb1)
- gimple_cond_set_rhs (cond, integer_minus_one_node);
+ {
+ gimple_cond_set_rhs (cond, integer_minus_one_node);
+ gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
+ ? EQ_EXPR : NE_EXPR);
+ }
else
{
gcc_assert (e1->src == bb1);
gimple_cond_set_rhs (cond, integer_one_node);
+ gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
+ ? EQ_EXPR : NE_EXPR);
}
update_stmt (g);
Jakub