[PATCH] Fix i?86 -mfpmath=sse ceil inline expansion (PR target/89726)

2019-03-15 Thread Jakub Jelinek
Hi!

As the enhanced testcase shows, ix86_expand_floorceildf_32 doesn't emit
correct ceil when honoring signed zeros, because it performs copysign,
then comparison and then based on the comparison an optional addition of 1.
The comment claims that it is essential to use x2 -= -1.0 instead of
x2 += 1.0, but in reality we emit x2 += 1.0 anyway and it makes no
difference on the sign of result if it is zero, as (unless rounding to
negative infinity) both -1.0 - (-1.0) and -1.0 + 1.0 evaluate to +0.0
rather than -0.0 we need in this case.

I have no better ideas than to use another copysign, but it should be just
one extra instruction, as we've already previously done copysign from the
same source and thus hve already computed the x & signmask and so this patch
just adds a {,v}por of that previously computed x & signmask into the result
again.  In this case we aren't really copying it always to positive, but
all we want is handle the single problematic case when we compute positive 0
and need negative 0, for all others the oring won't really change the sign.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Or does anybody have some smarter idea?

Note, not sure if floor isn't incorrect in the round to negative infinity
case (but that would be preexisting issue).

2019-03-15  Jakub Jelinek  

PR target/89726
* config/i386/i386.c (ix86_expand_floorceildf_32): In ceil
compensation use x2 += 1 instead of x2 -= -1 and when honoring
signed zeros, do another copysign after the compensation.

* gcc.target/i386/fpprec-1.c (x): Add 6 new constants.
(expect_round, expect_rint, expect_floor, expect_ceil, expect_trunc):
Add expected results for them.

--- gcc/config/i386/i386.c.jj   2019-03-14 23:44:27.862560139 +0100
+++ gcc/config/i386/i386.c  2019-03-15 15:07:54.607453430 +0100
@@ -45563,8 +45563,10 @@ ix86_expand_floorceildf_32 (rtx operand0
   x2 -= 1;
  Compensate.  Ceil:
 if (x2 < x)
-  x2 -= -1;
-return x2;
+  x2 += 1;
+   if (HONOR_SIGNED_ZEROS (mode))
+ x2 = copysign (x2, x);
+   return x2;
*/
   machine_mode mode = GET_MODE (operand0);
   rtx xa, TWO52, tmp, one, res, mask;
@@ -45590,17 +45592,16 @@ ix86_expand_floorceildf_32 (rtx operand0
   /* xa = copysign (xa, operand1) */
   ix86_sse_copysign_to_positive (xa, xa, res, mask);
 
-  /* generate 1.0 or -1.0 */
-  one = force_reg (mode,
-  const_double_from_real_value (do_floor
-? dconst1 : dconstm1, mode));
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
 
   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
   emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
-  /* We always need to subtract here to preserve signed zero.  */
-  tmp = expand_simple_binop (mode, MINUS,
+  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  if (!do_floor && HONOR_SIGNED_ZEROS (mode))
+ix86_sse_copysign_to_positive (tmp, tmp, res, mask);
   emit_move_insn (res, tmp);
 
   emit_label (label);
--- gcc/testsuite/gcc.target/i386/fpprec-1.c.jj 2010-05-21 11:46:22.0 
+0200
+++ gcc/testsuite/gcc.target/i386/fpprec-1.c2019-03-15 15:01:51.430345113 
+0100
@@ -11,6 +11,9 @@ double x[] = { __builtin_nan(""), __buil
0x1.1p-1, 0x1.fp-2,
0x1.1p+0, 0x1.fp-1,
0x1.80001p+0, 0x1.7p+0,
+   -0x1.1p-1, -0x1.fp-2,
+   -0x1.1p+0, -0x1.fp-1,
+   -0x1.80001p+0, -0x1.7p+0,
-0.0, 0.0, -0.5, 0.5, -1.0, 1.0, -1.5, 1.5, -2.0, 2.0,
-2.5, 2.5 };
 #define NUM (sizeof(x)/sizeof(double))
@@ -19,6 +22,7 @@ double expect_round[] = { __builtin_nan(
-0x1.fp+1023, 0x1.fp+1023,
-0.0, 0.0,
1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+   -1.0, -0.0, -1.0, -1.0, -2.0, -1.0,
-0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
-3.0, 3.0 };
 
@@ -26,6 +30,7 @@ double expect_rint[] = { __builtin_nan("
 -0x1.fp+1023, 0x1.fp+1023,
 -0.0, 0.0,
 1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+-1.0, -0.0, -1.0, -1.0, -2.0, -1.0,
 -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
 -2.0, 2.0 };
 
@@ -33,6 +38,7 @@ double expect_floor[] = { __builtin_nan(
 -0x1.fp+1023, 0x1.fp+1023,
 -1.0, 0.0,
 0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+-1.0, -1.0, -2.0, -1.0, -2.0, -2.0,
 -0.0, 0.0, -1.0, 0.0, -1.0, 1.0, -2.0, 1.0, -2.0, 2.0,
 -3.0, 2.0 };
 
@@ -40,6 +46,7 @@ double expect_ceil[] = { __builtin_nan("
 -0x1.fp+1023, 0x1.fp+1023,

Re: [PATCH] Fix i?86 -mfpmath=sse ceil inline expansion (PR target/89726)

2019-03-19 Thread Uros Bizjak
On Fri, Mar 15, 2019 at 9:46 PM Jakub Jelinek  wrote:
>
> Hi!
>
> As the enhanced testcase shows, ix86_expand_floorceildf_32 doesn't emit
> correct ceil when honoring signed zeros, because it performs copysign,
> then comparison and then based on the comparison an optional addition of 1.
> The comment claims that it is essential to use x2 -= -1.0 instead of
> x2 += 1.0, but in reality we emit x2 += 1.0 anyway and it makes no
> difference on the sign of result if it is zero, as (unless rounding to
> negative infinity) both -1.0 - (-1.0) and -1.0 + 1.0 evaluate to +0.0
> rather than -0.0 we need in this case.
>
> I have no better ideas than to use another copysign, but it should be just
> one extra instruction, as we've already previously done copysign from the
> same source and thus hve already computed the x & signmask and so this patch
> just adds a {,v}por of that previously computed x & signmask into the result
> again.  In this case we aren't really copying it always to positive, but
> all we want is handle the single problematic case when we compute positive 0
> and need negative 0, for all others the oring won't really change the sign.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> Or does anybody have some smarter idea?
>
> Note, not sure if floor isn't incorrect in the round to negative infinity
> case (but that would be preexisting issue).
>
> 2019-03-15  Jakub Jelinek  
>
> PR target/89726
> * config/i386/i386.c (ix86_expand_floorceildf_32): In ceil
> compensation use x2 += 1 instead of x2 -= -1 and when honoring
> signed zeros, do another copysign after the compensation.
>
> * gcc.target/i386/fpprec-1.c (x): Add 6 new constants.
> (expect_round, expect_rint, expect_floor, expect_ceil, expect_trunc):
> Add expected results for them.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2019-03-14 23:44:27.862560139 +0100
> +++ gcc/config/i386/i386.c  2019-03-15 15:07:54.607453430 +0100
> @@ -45563,8 +45563,10 @@ ix86_expand_floorceildf_32 (rtx operand0
>x2 -= 1;
>   Compensate.  Ceil:
>  if (x2 < x)
> -  x2 -= -1;
> -return x2;
> +  x2 += 1;
> +   if (HONOR_SIGNED_ZEROS (mode))
> + x2 = copysign (x2, x);
> +   return x2;
> */
>machine_mode mode = GET_MODE (operand0);
>rtx xa, TWO52, tmp, one, res, mask;
> @@ -45590,17 +45592,16 @@ ix86_expand_floorceildf_32 (rtx operand0
>/* xa = copysign (xa, operand1) */
>ix86_sse_copysign_to_positive (xa, xa, res, mask);
>
> -  /* generate 1.0 or -1.0 */
> -  one = force_reg (mode,
> -  const_double_from_real_value (do_floor
> -? dconst1 : dconstm1, mode));
> +  /* generate 1.0 */
> +  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
>
>/* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
>tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
>emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
> -  /* We always need to subtract here to preserve signed zero.  */
> -  tmp = expand_simple_binop (mode, MINUS,
> +  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
>  xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
> +  if (!do_floor && HONOR_SIGNED_ZEROS (mode))
> +ix86_sse_copysign_to_positive (tmp, tmp, res, mask);
>emit_move_insn (res, tmp);
>
>emit_label (label);
> --- gcc/testsuite/gcc.target/i386/fpprec-1.c.jj 2010-05-21 11:46:22.0 
> +0200
> +++ gcc/testsuite/gcc.target/i386/fpprec-1.c2019-03-15 15:01:51.430345113 
> +0100
> @@ -11,6 +11,9 @@ double x[] = { __builtin_nan(""), __buil
> 0x1.1p-1, 0x1.fp-2,
> 0x1.1p+0, 0x1.fp-1,
> 0x1.80001p+0, 0x1.7p+0,
> +   -0x1.1p-1, -0x1.fp-2,
> +   -0x1.1p+0, -0x1.fp-1,
> +   -0x1.80001p+0, -0x1.7p+0,
> -0.0, 0.0, -0.5, 0.5, -1.0, 1.0, -1.5, 1.5, -2.0, 2.0,
> -2.5, 2.5 };
>  #define NUM (sizeof(x)/sizeof(double))
> @@ -19,6 +22,7 @@ double expect_round[] = { __builtin_nan(
> -0x1.fp+1023, 0x1.fp+1023,
> -0.0, 0.0,
> 1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
> +   -1.0, -0.0, -1.0, -1.0, -2.0, -1.0,
> -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
> -3.0, 3.0 };
>
> @@ -26,6 +30,7 @@ double expect_rint[] = { __builtin_nan("
>  -0x1.fp+1023, 0x1.fp+1023,
>  -0.0, 0.0,
>  1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
> +-1.0, -0.0, -1.0, -1.0, -2.0, -1.0,
>  -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
>  -2.0, 2.0 };
>
> @@ -33,6 +38,7 @@ double expect_floor[] = { __builtin_nan(
>  -0x1.fp+1023, 0x1.fp+1023,
>  -1.0, 0.0,
>  0.0, 0.