Hi!

As the enhanced testcase shows, ix86_expand_floorceildf_32 doesn't emit
correct ceil when honoring signed zeros, because it performs copysign,
then comparison and then based on the comparison an optional addition of 1.
The comment claims that it is essential to use x2 -= -1.0 instead of
x2 += 1.0, but in reality we emit x2 += 1.0 anyway and it makes no
difference on the sign of result if it is zero, as (unless rounding to
negative infinity) both -1.0 - (-1.0) and -1.0 + 1.0 evaluate to +0.0
rather than -0.0 we need in this case.

I have no better ideas than to use another copysign, but it should be just
one extra instruction, as we've already previously done copysign from the
same source and thus hve already computed the x & signmask and so this patch
just adds a {,v}por of that previously computed x & signmask into the result
again.  In this case we aren't really copying it always to positive, but
all we want is handle the single problematic case when we compute positive 0
and need negative 0, for all others the oring won't really change the sign.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Or does anybody have some smarter idea?

Note, not sure if floor isn't incorrect in the round to negative infinity
case (but that would be preexisting issue).

2019-03-15  Jakub Jelinek  <ja...@redhat.com>

        PR target/89726
        * config/i386/i386.c (ix86_expand_floorceildf_32): In ceil
        compensation use x2 += 1 instead of x2 -= -1 and when honoring
        signed zeros, do another copysign after the compensation.

        * gcc.target/i386/fpprec-1.c (x): Add 6 new constants.
        (expect_round, expect_rint, expect_floor, expect_ceil, expect_trunc):
        Add expected results for them.

--- gcc/config/i386/i386.c.jj   2019-03-14 23:44:27.862560139 +0100
+++ gcc/config/i386/i386.c      2019-03-15 15:07:54.607453430 +0100
@@ -45563,8 +45563,10 @@ ix86_expand_floorceildf_32 (rtx operand0
           x2 -= 1;
      Compensate.  Ceil:
         if (x2 < x)
-          x2 -= -1;
-        return x2;
+          x2 += 1;
+       if (HONOR_SIGNED_ZEROS (mode))
+         x2 = copysign (x2, x);
+       return x2;
    */
   machine_mode mode = GET_MODE (operand0);
   rtx xa, TWO52, tmp, one, res, mask;
@@ -45590,17 +45592,16 @@ ix86_expand_floorceildf_32 (rtx operand0
   /* xa = copysign (xa, operand1) */
   ix86_sse_copysign_to_positive (xa, xa, res, mask);
 
-  /* generate 1.0 or -1.0 */
-  one = force_reg (mode,
-                  const_double_from_real_value (do_floor
-                                                ? dconst1 : dconstm1, mode));
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
 
   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
   emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
-  /* We always need to subtract here to preserve signed zero.  */
-  tmp = expand_simple_binop (mode, MINUS,
+  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
                             xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  if (!do_floor && HONOR_SIGNED_ZEROS (mode))
+    ix86_sse_copysign_to_positive (tmp, tmp, res, mask);
   emit_move_insn (res, tmp);
 
   emit_label (label);
--- gcc/testsuite/gcc.target/i386/fpprec-1.c.jj 2010-05-21 11:46:22.000000000 
+0200
+++ gcc/testsuite/gcc.target/i386/fpprec-1.c    2019-03-15 15:01:51.430345113 
+0100
@@ -11,6 +11,9 @@ double x[] = { __builtin_nan(""), __buil
        0x1.0000000000001p-1, 0x1.fffffffffffffp-2,
        0x1.0000000000001p+0, 0x1.fffffffffffffp-1,
        0x1.8000000000001p+0, 0x1.7ffffffffffffp+0,
+       -0x1.0000000000001p-1, -0x1.fffffffffffffp-2,
+       -0x1.0000000000001p+0, -0x1.fffffffffffffp-1,
+       -0x1.8000000000001p+0, -0x1.7ffffffffffffp+0,
        -0.0, 0.0, -0.5, 0.5, -1.0, 1.0, -1.5, 1.5, -2.0, 2.0,
        -2.5, 2.5 };
 #define NUM (sizeof(x)/sizeof(double))
@@ -19,6 +22,7 @@ double expect_round[] = { __builtin_nan(
        -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
        -0.0, 0.0,
        1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+       -1.0, -0.0, -1.0, -1.0, -2.0, -1.0,
        -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
        -3.0, 3.0 };
 
@@ -26,6 +30,7 @@ double expect_rint[] = { __builtin_nan("
         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
         -0.0, 0.0,
         1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+        -1.0, -0.0, -1.0, -1.0, -2.0, -1.0,
         -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
         -2.0, 2.0 };
 
@@ -33,6 +38,7 @@ double expect_floor[] = { __builtin_nan(
         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
         -1.0, 0.0,
         0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+        -1.0, -1.0, -2.0, -1.0, -2.0, -2.0,
         -0.0, 0.0, -1.0, 0.0, -1.0, 1.0, -2.0, 1.0, -2.0, 2.0,
         -3.0, 2.0 };
 
@@ -40,6 +46,7 @@ double expect_ceil[] = { __builtin_nan("
         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
         -0.0, 1.0,
         1.0, 1.0, 2.0, 1.0, 2.0, 2.0,
+        -0.0, -0.0, -1.0, -0.0, -1.0, -1.0,
         -0.0, 0.0, -0.0, 1.0, -1.0, 1.0, -1.0, 2.0, -2.0, 2.0,
         -2.0, 3.0 };
 
@@ -47,6 +54,7 @@ double expect_trunc[] = { __builtin_nan(
         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
         -0.0, 0.0,
         0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+        -0.0, -0.0, -1.0, -0.0, -1.0, -1.0,
         -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0,
         -2.0, 2.0 };
 

        Jakub

Reply via email to