On Sat, Jan 15, 2022 at 09:29:05AM +0100, Uros Bizjak wrote: > > --- gcc/config/i386/i386.md.jj 2022-01-14 11:51:34.432384170 +0100 > > +++ gcc/config/i386/i386.md 2022-01-14 18:22:41.140906449 +0100 > > @@ -23886,6 +23886,18 @@ (define_insn "hreset" > > [(set_attr "type" "other") > > (set_attr "length" "4")]) > > > > +;; Spaceship optimization > > +(define_expand "spaceship<mode>3" > > + [(match_operand:SI 0 "register_operand") > > + (match_operand:MODEF 1 "cmp_fp_expander_operand") > > + (match_operand:MODEF 2 "cmp_fp_expander_operand")] > > + "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) > > + && TARGET_CMOVE && TARGET_IEEE_FP" > > Is there a reason that this pattern is limited to TARGET_IEEE_FP? > During the expansion in ix86_expand_fp_spaceship, we can just skip > jump on unordered, while ix86_expand_fp_compare will emit the correct > comparison mode depending on TARGET_IEEE_FP.
For -ffast-math I thought <=> expands to just x == y ? 0 : x < y ? -1 : 1 but apparently not, it is still x == y ? 0 : x < y ? -1 : x > y ? 1 : 2 but it is still optimized much better than the non-fast-math case without the patch: comisd %xmm1, %xmm0 je .L12 jb .L13 movl $1, %edx movl $2, %eax cmova %edx, %eax ret .p2align 4,,10 .p2align 3 .L12: xorl %eax, %eax ret .p2align 4,,10 .p2align 3 .L13: movl $-1, %eax ret so just one comparison but admittedly the movl $1, %edx movl $2, %eax cmova %edx, %eax part is unnecessary. So below is an incremental patch that handles even the !HONORS_NANS case at the gimple pattern matching (while for HONOR_NANS we must obey that for NaN operand(s) >/>=/</<= is false and so need to make sure we look for the third comparison on the false edge of the second one, for !HONOR_NANS that is not the case. With the incremental patch we get: comisd %xmm1, %xmm0 je .L2 seta %al leal -1(%rax,%rax), %eax ret .p2align 4,,10 .p2align 3 .L2: xorl %eax, %eax ret for -O2 -ffast-math. Also, I've added || (TARGET_SAHF && TARGET_USE_SAHF), because apparently we can handle that case nicely too, it is just the IX86_FPCMP_ARITH case where fp_compare already emits very specific code (and we can't call ix86_expand_fp_compare 3 times because that would for IEEE_FP emit different comparisons which couldn't be CSEd). I'll add also -ffast-math testsuite coverage and retest. Also, I wonder if I shouldn't handle XFmode the same, thoughts on that? > > + gcc_checking_assert (ix86_fp_comparison_strategy (GT) == > > IX86_FPCMP_COMI); > > + rtx gt = ix86_expand_fp_compare (GT, op0, op1); > > + rtx l0 = gen_label_rtx (); > > + rtx l1 = gen_label_rtx (); > > + rtx l2 = gen_label_rtx (); > > + rtx lend = gen_label_rtx (); > > + rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, > > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > > + rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, > > + gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx); > > + rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > > + add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); > > Please also add JUMP_LABEL to the insn. > > > + rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode, > > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq, > > + gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx); > > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > > + add_reg_br_prob_note (jmp, profile_probability::unlikely ()); > > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt, > > + gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx); > > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > > + add_reg_br_prob_note (jmp, profile_probability::even ()); > > + emit_move_insn (dest, constm1_rtx); > > + emit_jump (lend); > > + emit_label (l0); > > and LABEL_NUSES label. Why? That seems to be a waste of time to me, unless something uses them already during expansion. Because pass_expand::execute runs: /* We need JUMP_LABEL be set in order to redirect jumps, and hence split edges which edge insertions might do. */ rebuild_jump_labels (get_insns ()); which resets all LABEL_NUSES to 0 (well, to: if (LABEL_P (insn)) LABEL_NUSES (insn) = (LABEL_PRESERVE_P (insn) != 0); and then recomputes them and adds JUMP_LABEL if needed: JUMP_LABEL (insn) = label; --- gcc/config/i386/i386.md.jj 2022-01-15 09:51:25.404468342 +0100 +++ gcc/config/i386/i386.md 2022-01-15 09:56:31.602109421 +0100 @@ -23892,7 +23892,7 @@ (define_expand "spaceship<mode>3" (match_operand:MODEF 1 "cmp_fp_expander_operand") (match_operand:MODEF 2 "cmp_fp_expander_operand")] "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) - && TARGET_CMOVE && TARGET_IEEE_FP" + && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))" { ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]); DONE; --- gcc/config/i386/i386-expand.c.jj 2022-01-15 09:51:25.411468242 +0100 +++ gcc/config/i386/i386-expand.c 2022-01-15 10:38:26.924333651 +0100 @@ -2884,18 +2884,23 @@ ix86_expand_setcc (rtx dest, enum rtx_co void ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1) { - gcc_checking_assert (ix86_fp_comparison_strategy (GT) == IX86_FPCMP_COMI); + gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH); rtx gt = ix86_expand_fp_compare (GT, op0, op1); rtx l0 = gen_label_rtx (); rtx l1 = gen_label_rtx (); - rtx l2 = gen_label_rtx (); + rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX; rtx lend = gen_label_rtx (); - rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, - gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); - rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, + rtx tmp; + rtx_insn *jmp; + if (l2) + { + rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx); - rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); - add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); + } rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq, @@ -2914,8 +2919,11 @@ ix86_expand_fp_spaceship (rtx dest, rtx emit_label (l1); emit_move_insn (dest, const1_rtx); emit_jump (lend); - emit_label (l2); - emit_move_insn (dest, const2_rtx); + if (l2) + { + emit_label (l2); + emit_move_insn (dest, const2_rtx); + } emit_label (lend); } --- gcc/tree-ssa-math-opts.c.jj 2022-01-15 09:51:25.402468370 +0100 +++ gcc/tree-ssa-math-opts.c 2022-01-15 10:35:52.366533951 +0100 @@ -4694,7 +4694,6 @@ optimize_spaceship (gimple *stmt) tree arg1 = gimple_cond_lhs (stmt); tree arg2 = gimple_cond_rhs (stmt); if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) - || !HONOR_NANS (TREE_TYPE (arg1)) || optab_handler (spaceship_optab, TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing || operand_equal_p (arg1, arg2, 0)) @@ -4732,56 +4731,67 @@ optimize_spaceship (gimple *stmt) return; } - /* With NaNs, </<=/>/>= are false, so we need to look for the - third comparison on the false edge from whatever non-equality - comparison the second comparison is. */ - int i = (EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0; - bb2 = EDGE_SUCC (bb1, i)->dest; - g = last_stmt (bb2); - if (g == NULL - || gimple_code (g) != GIMPLE_COND - || !single_pred_p (bb2) - || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) - ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) - : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) - || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) - || !cond_only_block_p (bb2) - || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) - return; - - enum tree_code ccode2 - = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR); - switch (gimple_cond_code (g)) + for (int i = 0; i < 2; ++i) { - case LT_EXPR: - case LE_EXPR: + /* With NaNs, </<=/>/>= are false, so we need to look for the + third comparison on the false edge from whatever non-equality + comparison the second comparison is. */ + if (HONOR_NANS (TREE_TYPE (arg1)) + && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0) + continue; + + bb2 = EDGE_SUCC (bb1, i)->dest; + g = last_stmt (bb2); + if (g == NULL + || gimple_code (g) != GIMPLE_COND + || !single_pred_p (bb2) + || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) + ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) + : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) + || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) + || !cond_only_block_p (bb2) + || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) + continue; + + enum tree_code ccode2 + = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR); + switch (gimple_cond_code (g)) + { + case LT_EXPR: + case LE_EXPR: + break; + case GT_EXPR: + case GE_EXPR: + ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; + break; + default: + continue; + } + if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2) + return; + + if ((ccode == LT_EXPR) + ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)) + { + em1 = EDGE_SUCC (bb1, 1 - i); + e1 = EDGE_SUCC (bb2, 0); + e2 = EDGE_SUCC (bb2, 1); + if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0)) + std::swap (e1, e2); + } + else + { + e1 = EDGE_SUCC (bb1, 1 - i); + em1 = EDGE_SUCC (bb2, 0); + e2 = EDGE_SUCC (bb2, 1); + if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0)) + std::swap (em1, e2); + } break; - case GT_EXPR: - case GE_EXPR: - ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; - break; - default: - return; } - if (ccode == ccode2) - return; - if (ccode == LT_EXPR) - { - em1 = EDGE_SUCC (bb1, 1 - i); - e1 = EDGE_SUCC (bb2, 0); - e2 = EDGE_SUCC (bb2, 1); - if ((e1->flags & EDGE_TRUE_VALUE) == 0) - std::swap (e1, e2); - } - else - { - e1 = EDGE_SUCC (bb1, 1 - i); - em1 = EDGE_SUCC (bb2, 0); - e2 = EDGE_SUCC (bb2, 1); - if ((em1->flags & EDGE_TRUE_VALUE) == 0) - std::swap (em1, e2); - } + if (em1 == NULL) + return; g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2); tree lhs = make_ssa_name (integer_type_node); @@ -4796,14 +4806,19 @@ optimize_spaceship (gimple *stmt) g = last_stmt (bb1); cond = as_a <gcond *> (g); - gimple_cond_set_code (cond, EQ_EXPR); gimple_cond_set_lhs (cond, lhs); if (em1->src == bb1) - gimple_cond_set_rhs (cond, integer_minus_one_node); + { + gimple_cond_set_rhs (cond, integer_minus_one_node); + gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE) + ? EQ_EXPR : NE_EXPR); + } else { gcc_assert (e1->src == bb1); gimple_cond_set_rhs (cond, integer_one_node); + gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE) + ? EQ_EXPR : NE_EXPR); } update_stmt (g); Jakub