On Sat, Jan 15, 2022 at 09:29:05AM +0100, Uros Bizjak wrote:
> > --- gcc/config/i386/i386.md.jj  2022-01-14 11:51:34.432384170 +0100
> > +++ gcc/config/i386/i386.md     2022-01-14 18:22:41.140906449 +0100
> > @@ -23886,6 +23886,18 @@ (define_insn "hreset"
> >    [(set_attr "type" "other")
> >     (set_attr "length" "4")])
> >
> > +;; Spaceship optimization
> > +(define_expand "spaceship<mode>3"
> > +  [(match_operand:SI 0 "register_operand")
> > +   (match_operand:MODEF 1 "cmp_fp_expander_operand")
> > +   (match_operand:MODEF 2 "cmp_fp_expander_operand")]
> > +  "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
> > +   && TARGET_CMOVE && TARGET_IEEE_FP"
> 
> Is there a reason that this pattern is limited to TARGET_IEEE_FP?
> During the expansion in ix86_expand_fp_spaceship, we can just skip
> jump on unordered, while ix86_expand_fp_compare will emit the correct
> comparison mode depending on TARGET_IEEE_FP.

For -ffast-math I thought <=> expands to just x == y ? 0 : x < y ? -1 : 1
but apparently not, it is still x == y ? 0 : x < y ? -1 : x > y ? 1 : 2
but it is still optimized much better than the non-fast-math case
without the patch:
        comisd  %xmm1, %xmm0
        je      .L12
        jb      .L13
        movl    $1, %edx
        movl    $2, %eax
        cmova   %edx, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L12:
        xorl    %eax, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L13:
        movl    $-1, %eax
        ret
so just one comparison but admittedly the
        movl    $1, %edx
        movl    $2, %eax
        cmova   %edx, %eax
part is unnecessary.
So below is an incremental patch that handles even the !HONORS_NANS
case at the gimple pattern matching (while for HONOR_NANS we must
obey that for NaN operand(s) >/>=/</<= is false and so need to make sure
we look for the third comparison on the false edge of the second one,
for !HONOR_NANS that is not the case.  With the incremental patch we get:
        comisd  %xmm1, %xmm0
        je      .L2
        seta    %al
        leal    -1(%rax,%rax), %eax
        ret
        .p2align 4,,10
        .p2align 3
.L2:
        xorl    %eax, %eax
        ret
for -O2 -ffast-math.
Also, I've added || (TARGET_SAHF && TARGET_USE_SAHF), because apparently
we can handle that case nicely too, it is just the IX86_FPCMP_ARITH
case where fp_compare already emits very specific code (and we can't
call ix86_expand_fp_compare 3 times because that would for IEEE_FP
emit different comparisons which couldn't be CSEd).
I'll add also -ffast-math testsuite coverage and retest.

Also, I wonder if I shouldn't handle XFmode the same, thoughts on that?

> > +  gcc_checking_assert (ix86_fp_comparison_strategy (GT) == 
> > IX86_FPCMP_COMI);
> > +  rtx gt = ix86_expand_fp_compare (GT, op0, op1);
> > +  rtx l0 = gen_label_rtx ();
> > +  rtx l1 = gen_label_rtx ();
> > +  rtx l2 = gen_label_rtx ();
> > +  rtx lend = gen_label_rtx ();
> > +  rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
> > +                          gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> > +  rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
> > +                                 gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
> > +  rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > +  add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
> 
> Please also add JUMP_LABEL to the insn.
> 
> > +  rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
> > +                          gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> > +  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
> > +                             gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
> > +  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > +  add_reg_br_prob_note (jmp, profile_probability::unlikely ());
> > +  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
> > +                             gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
> > +  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > +  add_reg_br_prob_note (jmp, profile_probability::even ());
> > +  emit_move_insn (dest, constm1_rtx);
> > +  emit_jump (lend);
> > +  emit_label (l0);
> 
> and LABEL_NUSES label.

Why?  That seems to be a waste of time to me, unless something uses them
already during expansion.  Because pass_expand::execute
runs:
  /* We need JUMP_LABEL be set in order to redirect jumps, and hence
     split edges which edge insertions might do.  */
  rebuild_jump_labels (get_insns ());
which resets all LABEL_NUSES to 0 (well, to:
      if (LABEL_P (insn))
        LABEL_NUSES (insn) = (LABEL_PRESERVE_P (insn) != 0);
and then recomputes them and adds JUMP_LABEL if needed:
              JUMP_LABEL (insn) = label;

--- gcc/config/i386/i386.md.jj  2022-01-15 09:51:25.404468342 +0100
+++ gcc/config/i386/i386.md     2022-01-15 09:56:31.602109421 +0100
@@ -23892,7 +23892,7 @@ (define_expand "spaceship<mode>3"
    (match_operand:MODEF 1 "cmp_fp_expander_operand")
    (match_operand:MODEF 2 "cmp_fp_expander_operand")]
   "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
-   && TARGET_CMOVE && TARGET_IEEE_FP"
+   && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
 {
   ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
   DONE;
--- gcc/config/i386/i386-expand.c.jj    2022-01-15 09:51:25.411468242 +0100
+++ gcc/config/i386/i386-expand.c       2022-01-15 10:38:26.924333651 +0100
@@ -2884,18 +2884,23 @@ ix86_expand_setcc (rtx dest, enum rtx_co
 void
 ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
 {
-  gcc_checking_assert (ix86_fp_comparison_strategy (GT) == IX86_FPCMP_COMI);
+  gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH);
   rtx gt = ix86_expand_fp_compare (GT, op0, op1);
   rtx l0 = gen_label_rtx ();
   rtx l1 = gen_label_rtx ();
-  rtx l2 = gen_label_rtx ();
+  rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX;
   rtx lend = gen_label_rtx ();
-  rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
-                          gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
-  rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
+  rtx tmp;
+  rtx_insn *jmp;
+  if (l2)
+    {
+      rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
+                              gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
                                  gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
-  rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
-  add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
+      jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+      add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
+    }
   rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
                           gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
@@ -2914,8 +2919,11 @@ ix86_expand_fp_spaceship (rtx dest, rtx
   emit_label (l1);
   emit_move_insn (dest, const1_rtx);
   emit_jump (lend);
-  emit_label (l2);
-  emit_move_insn (dest, const2_rtx);
+  if (l2)
+    {
+      emit_label (l2);
+      emit_move_insn (dest, const2_rtx);
+    }
   emit_label (lend);
 }
 
--- gcc/tree-ssa-math-opts.c.jj 2022-01-15 09:51:25.402468370 +0100
+++ gcc/tree-ssa-math-opts.c    2022-01-15 10:35:52.366533951 +0100
@@ -4694,7 +4694,6 @@ optimize_spaceship (gimple *stmt)
   tree arg1 = gimple_cond_lhs (stmt);
   tree arg2 = gimple_cond_rhs (stmt);
   if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
-      || !HONOR_NANS (TREE_TYPE (arg1))
       || optab_handler (spaceship_optab,
                        TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
       || operand_equal_p (arg1, arg2, 0))
@@ -4732,56 +4731,67 @@ optimize_spaceship (gimple *stmt)
       return;
     }
 
-  /* With NaNs, </<=/>/>= are false, so we need to look for the
-     third comparison on the false edge from whatever non-equality
-     comparison the second comparison is.  */
-  int i = (EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0;
-  bb2 = EDGE_SUCC (bb1, i)->dest;
-  g = last_stmt (bb2);
-  if (g == NULL
-      || gimple_code (g) != GIMPLE_COND
-      || !single_pred_p (bb2)
-      || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
-         ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
-         : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
-            || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
-      || !cond_only_block_p (bb2)
-      || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
-    return;
-
-  enum tree_code ccode2
-    = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
-  switch (gimple_cond_code (g))
+  for (int i = 0; i < 2; ++i)
     {
-    case LT_EXPR:
-    case LE_EXPR:
+      /* With NaNs, </<=/>/>= are false, so we need to look for the
+        third comparison on the false edge from whatever non-equality
+        comparison the second comparison is.  */
+      if (HONOR_NANS (TREE_TYPE (arg1))
+         && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
+       continue;
+
+      bb2 = EDGE_SUCC (bb1, i)->dest;
+      g = last_stmt (bb2);
+      if (g == NULL
+         || gimple_code (g) != GIMPLE_COND
+         || !single_pred_p (bb2)
+         || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
+             ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
+             : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
+                || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
+         || !cond_only_block_p (bb2)
+         || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
+       continue;
+
+      enum tree_code ccode2
+       = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
+      switch (gimple_cond_code (g))
+       {
+       case LT_EXPR:
+       case LE_EXPR:
+         break;
+       case GT_EXPR:
+       case GE_EXPR:
+         ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
+         break;
+       default:
+         continue;
+       }
+      if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
+       return;
+
+      if ((ccode == LT_EXPR)
+         ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
+       {
+         em1 = EDGE_SUCC (bb1, 1 - i);
+         e1 = EDGE_SUCC (bb2, 0);
+         e2 = EDGE_SUCC (bb2, 1);
+         if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
+           std::swap (e1, e2);
+       }
+      else
+       {
+         e1 = EDGE_SUCC (bb1, 1 - i);
+         em1 = EDGE_SUCC (bb2, 0);
+         e2 = EDGE_SUCC (bb2, 1);
+         if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
+           std::swap (em1, e2);
+       }
       break;
-    case GT_EXPR:
-    case GE_EXPR:
-      ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
-      break;
-    default:
-      return;
     }
-  if (ccode == ccode2)
-    return;
 
-  if (ccode == LT_EXPR)
-    {
-      em1 = EDGE_SUCC (bb1, 1 - i);
-      e1 = EDGE_SUCC (bb2, 0);
-      e2 = EDGE_SUCC (bb2, 1);
-      if ((e1->flags & EDGE_TRUE_VALUE) == 0)
-       std::swap (e1, e2);
-    }
-  else
-    {
-      e1 = EDGE_SUCC (bb1, 1 - i);
-      em1 = EDGE_SUCC (bb2, 0);
-      e2 = EDGE_SUCC (bb2, 1);
-      if ((em1->flags & EDGE_TRUE_VALUE) == 0)
-       std::swap (em1, e2);
-    }
+  if (em1 == NULL)
+    return;
 
   g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
   tree lhs = make_ssa_name (integer_type_node);
@@ -4796,14 +4806,19 @@ optimize_spaceship (gimple *stmt)
 
   g = last_stmt (bb1);
   cond = as_a <gcond *> (g);
-  gimple_cond_set_code (cond, EQ_EXPR);
   gimple_cond_set_lhs (cond, lhs);
   if (em1->src == bb1)
-    gimple_cond_set_rhs (cond, integer_minus_one_node);
+    {
+      gimple_cond_set_rhs (cond, integer_minus_one_node);
+      gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
+                                 ? EQ_EXPR : NE_EXPR);
+    }
   else
     {
       gcc_assert (e1->src == bb1);
       gimple_cond_set_rhs (cond, integer_one_node);
+      gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
+                                 ? EQ_EXPR : NE_EXPR);
     }
   update_stmt (g);
 


        Jakub

Reply via email to