RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-11-03 Thread Tamar Christina via Gcc-patches
Hi,

I have addressed all the feedback and updated patch attached:

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.dg/signbit-4.c: New test.
* gcc.dg/signbit-5.c: New test.
* gcc.dg/signbit-6.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
65a6591f75c0602147bbdf6d59f9ccd4b1e5..fe93500d22e239c8c9faf4c58cee95dec7f9
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
uniform_integer_cst_p
HONOR_NANS
uniform_vector_p
-   bitmask_inv_cst_vector_p)
+   bitmask_inv_cst_vector_p
+   expand_vec_cmp_expr_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -832,6 +833,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 { tree utype = unsigned_type_for (type); }
 (convert (rshift (lshift (convert:utype @0) @2) @3))
 
+/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!TYPE_UNSIGNED (type)
+&& TYPE_OVERFLOW_UNDEFINED (type))
+(with { tree stype = TREE_TYPE (@1);
+   tree bt = truth_type_for (type);
+   tree zeros = build_zero_cst (type); }
+ (switch
+  /* Handle scalar case.  */
+  (if (INTEGRAL_TYPE_P (type)
+  /* If we apply the rule to the scalar type before vectorization
+ we will enforce the result of the comparison being a bool
+ which will require an extra AND on the result that will be
+ indistinguishable from when the user did actually want 0
+ or 1 as the result so it can't be removed.  */
+  && canonicalize_math_after_vectorization_p ()
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1))
+   (negate (convert (gt @0 { zeros; }
+  /* Handle vector case.  */
+  (if (VECTOR_INTEGER_TYPE_P (type)
+  /* First check whether the target has the same mode for vector
+ comparison results as it's operands do.  */
+  && TYPE_MODE (bt) == TYPE_MODE (type)
+  /* Then check to see if the target is able to expand the comparison
+ with the given type later on, otherwise we may ICE.  */
+  && expand_vec_cmp_expr_p (type, bt, { GT_EXPR }))
+   (with { tree cst = uniform_integer_cst_p (@1); }
+   (if (cst && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1))
+(view_convert (gt:bt @0 { zeros; }))
+
 /* Fold (C1/X)*C2 into (C1*C2)/X.  */
 (simplify
  (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 
..fc0157cbc5c7996b481f2998bc30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } 
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c
new file mode 100644
index 
..19e9c06c349b3287610f817628f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-4.c b/gcc/testsuite/gcc.dg/signbit-4.c
new file mode 100644
index 
..bc459ba60a760bdf49e94dbec762f378c24fe9b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-4.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -fwrapv" } */
+
+#include 
+#include 
+#include 
+
+#ifndef N
+#define N 65
+#endif
+
+#ifndef TYPE
+#define TYPE int32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+__attribute__ ((noinline, noipa))
+void fun1(TYPE *x, int n)
+{
+for (int i = 0; i < n; i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+__attribute__ ((noinline, noipa, optimize("O0")))
+void fun2(TYPE *x, int n)
+{
+for (int

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-11-04 Thread Richard Biener via Gcc-patches
On Wed, 3 Nov 2021, Tamar Christina wrote:

> Hi,
> 
> I have addressed all the feedback and updated patch attached:
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no regressions.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * match.pd: New negate+shift pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/signbit-2.c: New test.
>   * gcc.dg/signbit-3.c: New test.
>   * gcc.dg/signbit-4.c: New test.
>   * gcc.dg/signbit-5.c: New test.
>   * gcc.dg/signbit-6.c: New test.
>   * gcc.target/aarch64/signbit-1.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 65a6591f75c0602147bbdf6d59f9ccd4b1e5..fe93500d22e239c8c9faf4c58cee95dec7f9
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
> uniform_integer_cst_p
> HONOR_NANS
> uniform_vector_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   expand_vec_cmp_expr_p)
>  
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -832,6 +833,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  { tree utype = unsigned_type_for (type); }
>  (convert (rshift (lshift (convert:utype @0) @2) @3))
>  
> +/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1.  */
> +(for cst (INTEGER_CST VECTOR_CST)
> + (simplify
> +  (rshift (negate:s @0) cst@1)
> +   (if (!TYPE_UNSIGNED (type)
> +&& TYPE_OVERFLOW_UNDEFINED (type))
> +(with { tree stype = TREE_TYPE (@1);
> + tree bt = truth_type_for (type);
> + tree zeros = build_zero_cst (type); }
> + (switch
> +  /* Handle scalar case.  */
> +  (if (INTEGRAL_TYPE_P (type)
> +/* If we apply the rule to the scalar type before vectorization
> +   we will enforce the result of the comparison being a bool
> +   which will require an extra AND on the result that will be
> +   indistinguishable from when the user did actually want 0
> +   or 1 as the result so it can't be removed.  */
> +&& canonicalize_math_after_vectorization_p ()
> +&& wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1))
> +   (negate (convert (gt @0 { zeros; }
> +  /* Handle vector case.  */
> +  (if (VECTOR_INTEGER_TYPE_P (type)
> +/* First check whether the target has the same mode for vector
> +   comparison results as it's operands do.  */
> +&& TYPE_MODE (bt) == TYPE_MODE (type)
> +/* Then check to see if the target is able to expand the comparison
> +   with the given type later on, otherwise we may ICE.  */
> +&& expand_vec_cmp_expr_p (type, bt, { GT_EXPR }))

No need to wrap GT_EXPR in { }

> +   (with { tree cst = uniform_integer_cst_p (@1); }

if you declare 'cst' above where you declare 'bt' you can do

  && (cst = uniform_integer_cst_p (@1)))

combining it with the if above, and the one below, simplifying indents
and flow.

OK with that change.

I guess it might happen that the scalar transform expands badly
on some targets?  Please have an eye on problems that come up.

Thanks,
Richard.

> + (if (cst && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1))
> +  (view_convert (gt:bt @0 { zeros; }))
> +
>  /* Fold (C1/X)*C2 into (C1*C2)/X.  */
>  (simplify
>   (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> b/gcc/testsuite/gcc.dg/signbit-2.c
> new file mode 100644
> index 
> ..fc0157cbc5c7996b481f2998bc30176c96a669bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
> +
> +#include 
> +
> +void fun1(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } 
> } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-3.c 
> b/gcc/testsuite/gcc.dg/signbit-3.c
> new file mode 100644
> index 
> ..19e9c06c349b3287610f817628f00938ece60bf7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
> +
> +#include 
> +
> +void fun1(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 31;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-4.c 
> b/gcc/testsuite/gcc.dg/signbit-4.c
> new 

Re: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-05 Thread Richard Earnshaw via Gcc-patches




On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:

Hi All,

This turns an inversion of the sign bit + arithmetic right shift into a
comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
 for (int i = 0; i < (n & -16); i++)
   x[i] = (-x[i]) >> 31;
}

Notwithstanding that I think shifting a negative value right is 
unspecified behaviour, I don't think this generates the same result when 
x[i] is INT_MIN either, although negating that is also unspecified since 
it can't be represented in an int.


R.


now generates:

.L3:
 ldr q0, [x0]
 cmgtv0.4s, v0.4s, #0
 str q0, [x0], 16
 cmp x0, x1
 bne .L3

instead of:

.L3:
 ldr q0, [x0]
 neg v0.4s, v0.4s
 sshrv0.4s, v0.4s, 31
 str q0, [x0], 16
 cmp x0, x1
 bne .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7190c96d14398143
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  { tree utype = unsigned_type_for (type); }
  (convert (rshift (lshift (convert:utype @0) @2) @3))
  
+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */

+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (with { tree ctype = TREE_TYPE (@0);
+  tree stype = TREE_TYPE (@1);
+  tree bt = truth_type_for (ctype); }
+(switch
+ /* Handle scalar case.  */
+ (if (INTEGRAL_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (ctype)
+ && !TYPE_UNSIGNED (ctype)
+ && canonicalize_math_after_vectorization_p ()
+ && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+ /* Handle vector case with a scalar immediate.  */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+ /* Handle vector case with a vector immediate.   */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+ && uniform_vector_p (@1))
+  (with { tree cst = vector_cst_elt (@1, 0);
+ tree t = TREE_TYPE (cst); }
+   (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+(convert:bt (gt:bt @0 { build_zero_cst (ctype); })
+
  /* Fold (C1/X)*C2 into (C1*C2)/X.  */
  (simplify
   (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 
..fc0157cbc5c7996b481f2998bc30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } 
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c
new file mode 100644
index 
..19e9c06c349b3287610f817628f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c 
b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index 
..3ebfb0586f37de29cf58635b27fe48503714447e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-a

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-05 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Richard Earnshaw 
> Sent: Tuesday, October 5, 2021 1:56 PM
> To: Tamar Christina ; gcc-patches@gcc.gnu.org
> Cc: nd ; rguent...@suse.de
> Subject: Re: [PATCH]middle-end convert negate + right shift into compare
> greater.
> 
> 
> 
> On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:
> > Hi All,
> >
> > This turns an inversion of the sign bit + arithmetic right shift into
> > a comparison with 0.
> >
> > i.e.
> >
> > void fun1(int32_t *x, int n)
> > {
> >  for (int i = 0; i < (n & -16); i++)
> >x[i] = (-x[i]) >> 31;
> > }
> >
> Notwithstanding that I think shifting a negative value right is unspecified
> behaviour, I don't think this generates the same result when x[i] is INT_MIN
> either, although negating that is also unspecified since it can't be
> represented in an int.
> 

You're right that they are implementation defined, but I think most ISAs do 
have a sane
Implementation of these two cases. At least both x86 and AArch64 just replicate 
the signbit
and for negate do two complement negation. So INT_MIN works as expected and 
results in 0.

But I'm happy to guard this behind some sort of target guard.

Regards,
Tamar

> R.
> 
> > now generates:
> >
> > .L3:
> >  ldr q0, [x0]
> >  cmgtv0.4s, v0.4s, #0
> >  str q0, [x0], 16
> >  cmp x0, x1
> >  bne .L3
> >
> > instead of:
> >
> > .L3:
> >  ldr q0, [x0]
> >  neg v0.4s, v0.4s
> >  sshrv0.4s, v0.4s, 31
> >  str q0, [x0], 16
> >  cmp x0, x1
> >  bne .L3
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > x86_64-pc-linux-gnu and no regressions.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * match.pd: New negate+shift pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/signbit-2.c: New test.
> > * gcc.dg/signbit-3.c: New test.
> > * gcc.target/aarch64/signbit-1.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index
> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7
> 190c96d14398143 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   { tree utype = unsigned_type_for (type); }
> >   (convert (rshift (lshift (convert:utype @0) @2) @3))
> >
> > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> > +(for cst (INTEGER_CST VECTOR_CST)
> > + (simplify
> > +  (rshift (negate:s @0) cst@1)
> > +   (with { tree ctype = TREE_TYPE (@0);
> > +  tree stype = TREE_TYPE (@1);
> > +  tree bt = truth_type_for (ctype); }
> > +(switch
> > + /* Handle scalar case.  */
> > + (if (INTEGRAL_TYPE_P (ctype)
> > + && !VECTOR_TYPE_P (ctype)
> > + && !TYPE_UNSIGNED (ctype)
> > + && canonicalize_math_after_vectorization_p ()
> > + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> > +  (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
> > + /* Handle vector case with a scalar immediate.  */
> > + (if (VECTOR_INTEGER_TYPE_P (ctype)
> > + && !VECTOR_TYPE_P (stype)
> > + && !TYPE_UNSIGNED (ctype)
> > +  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> > +  (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
> > + /* Handle vector case with a vector immediate.   */
> > + (if (VECTOR_INTEGER_TYPE_P (ctype)
> > + && VECTOR_TYPE_P (stype)
> > + && !TYPE_UNSIGNED (ctype)
> > + && uniform_vector_p (@1))
> > +  (with { tree cst = vector_cst_elt (@1, 0);
> > + tree t = TREE_TYPE (cst); }
> > +   (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
> > +(convert:bt (gt:bt @0 { build_zero_cst (ctype); })
> > +
> >   /* Fold (C1/X)*C2 into (C1*C2)/X.  */
> >   (simplify
> >(mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
> > diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> > b/gcc/testsuite/gcc.dg/signbit-
> 2.c
> > new file mode 100644
> > index
> ..fc0157cbc5c7996b481f2998bc
> 30176c96a669bb
> > --- /dev/null
> &g

Re: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-05 Thread Richard Earnshaw via Gcc-patches




On 05/10/2021 14:30, Tamar Christina wrote:




-Original Message-
From: Richard Earnshaw 
Sent: Tuesday, October 5, 2021 1:56 PM
To: Tamar Christina ; gcc-patches@gcc.gnu.org
Cc: nd ; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into compare
greater.



On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:

Hi All,

This turns an inversion of the sign bit + arithmetic right shift into
a comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
  for (int i = 0; i < (n & -16); i++)
x[i] = (-x[i]) >> 31;
}


Notwithstanding that I think shifting a negative value right is unspecified
behaviour, I don't think this generates the same result when x[i] is INT_MIN
either, although negating that is also unspecified since it can't be
represented in an int.



You're right that they are implementation defined, but I think most ISAs do 
have a sane
Implementation of these two cases. At least both x86 and AArch64 just replicate 
the signbit
and for negate do two complement negation. So INT_MIN works as expected and 
results in 0.


Which is not what the original code produces if you have wrapping ints, 
because -INT_MIN is INT_MIN, and thus still negative.


R.



But I'm happy to guard this behind some sort of target guard.

Regards,
Tamar


R.


now generates:

.L3:
  ldr q0, [x0]
  cmgtv0.4s, v0.4s, #0
  str q0, [x0], 16
  cmp x0, x1
  bne .L3

instead of:

.L3:
  ldr q0, [x0]
  neg v0.4s, v0.4s
  sshrv0.4s, v0.4s, 31
  str q0, [x0], 16
  cmp x0, x1
  bne .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/match.pd b/gcc/match.pd
index

7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7
190c96d14398143 100644

--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   { tree utype = unsigned_type_for (type); }
   (convert (rshift (lshift (convert:utype @0) @2) @3))

+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (with { tree ctype = TREE_TYPE (@0);
+  tree stype = TREE_TYPE (@1);
+  tree bt = truth_type_for (ctype); }
+(switch
+ /* Handle scalar case.  */
+ (if (INTEGRAL_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (ctype)
+ && !TYPE_UNSIGNED (ctype)
+ && canonicalize_math_after_vectorization_p ()
+ && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+ /* Handle vector case with a scalar immediate.  */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+ /* Handle vector case with a vector immediate.   */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+ && uniform_vector_p (@1))
+  (with { tree cst = vector_cst_elt (@1, 0);
+ tree t = TREE_TYPE (cst); }
+   (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+(convert:bt (gt:bt @0 { build_zero_cst (ctype); })
+
   /* Fold (C1/X)*C2 into (C1*C2)/X.  */
   (simplify
(mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-

2.c

new file mode 100644
index

..fc0157cbc5c7996b481f2998bc
30176c96a669bb

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } }

*/

+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-

3.c

new file mode 100644
index

..19e9c06c349b3287610f817628
f00938ece60bf7

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-05 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Richard Earnshaw 
> Sent: Tuesday, October 5, 2021 2:34 PM
> To: Tamar Christina ; gcc-patches@gcc.gnu.org
> Cc: nd ; rguent...@suse.de
> Subject: Re: [PATCH]middle-end convert negate + right shift into compare
> greater.
> 
> 
> 
> On 05/10/2021 14:30, Tamar Christina wrote:
> >
> >
> >> -Original Message-
> >> From: Richard Earnshaw 
> >> Sent: Tuesday, October 5, 2021 1:56 PM
> >> To: Tamar Christina ;
> >> gcc-patches@gcc.gnu.org
> >> Cc: nd ; rguent...@suse.de
> >> Subject: Re: [PATCH]middle-end convert negate + right shift into
> >> compare greater.
> >>
> >>
> >>
> >> On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:
> >>> Hi All,
> >>>
> >>> This turns an inversion of the sign bit + arithmetic right shift
> >>> into a comparison with 0.
> >>>
> >>> i.e.
> >>>
> >>> void fun1(int32_t *x, int n)
> >>> {
> >>>   for (int i = 0; i < (n & -16); i++)
> >>> x[i] = (-x[i]) >> 31;
> >>> }
> >>>
> >> Notwithstanding that I think shifting a negative value right is
> >> unspecified behaviour, I don't think this generates the same result
> >> when x[i] is INT_MIN either, although negating that is also
> >> unspecified since it can't be represented in an int.
> >>
> >
> > You're right that they are implementation defined, but I think most
> > ISAs do have a sane Implementation of these two cases. At least both
> > x86 and AArch64 just replicate the signbit and for negate do two
> complement negation. So INT_MIN works as expected and results in 0.
> 
> Which is not what the original code produces if you have wrapping ints,
> because -INT_MIN is INT_MIN, and thus still negative.
> 

True, but then you have a signed overflow which is undefined behaviour and not 
implementation defined

" If an exceptional condition occurs during the evaluation of an expression 
(that is, if the result is not mathematically defined or not in the range of 
representable values for its type), the behavior is undefined."

So it should still be acceptable to do in this case.

> R.
> 
> >
> > But I'm happy to guard this behind some sort of target guard.
> >
> > Regards,
> > Tamar
> >
> >> R.
> >>
> >>> now generates:
> >>>
> >>> .L3:
> >>>   ldr q0, [x0]
> >>>   cmgtv0.4s, v0.4s, #0
> >>>   str q0, [x0], 16
> >>>   cmp x0, x1
> >>>   bne .L3
> >>>
> >>> instead of:
> >>>
> >>> .L3:
> >>>   ldr q0, [x0]
> >>>   neg v0.4s, v0.4s
> >>>   sshrv0.4s, v0.4s, 31
> >>>   str q0, [x0], 16
> >>>   cmp x0, x1
> >>>   bne .L3
> >>>
> >>> Bootstrapped Regtested on aarch64-none-linux-gnu,
> >>> x86_64-pc-linux-gnu and no regressions.
> >>>
> >>> Ok for master?
> >>>
> >>> Thanks,
> >>> Tamar
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>>   * match.pd: New negate+shift pattern.
> >>>
> >>> gcc/testsuite/ChangeLog:
> >>>
> >>>   * gcc.dg/signbit-2.c: New test.
> >>>   * gcc.dg/signbit-3.c: New test.
> >>>   * gcc.target/aarch64/signbit-1.c: New test.
> >>>
> >>> --- inline copy of patch --
> >>> diff --git a/gcc/match.pd b/gcc/match.pd index
> >>
> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7
> >> 190c96d14398143 100644
> >>> --- a/gcc/match.pd
> >>> +++ b/gcc/match.pd
> >>> @@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >>>{ tree utype = unsigned_type_for (type); }
> >>>(convert (rshift (lshift (convert:utype @0) @2) @3))
> >>>
> >>> +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> >>> +(for cst (INTEGER_CST VECTOR_CST)  (simplify
> >>> +  (rshift (negate:s @0) cst@1)
> >>> +   (with { tree ctype = TREE_TYPE (@0);
> >>> +tree stype = TREE_TYPE (@1);
> >>> +tree bt = truth_type_for (ctype); }
> >>> +(switch
> >>> + /* Hand

Re: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-05 Thread Richard Earnshaw via Gcc-patches




On 05/10/2021 14:49, Tamar Christina wrote:

-Original Message-
From: Richard Earnshaw 
Sent: Tuesday, October 5, 2021 2:34 PM
To: Tamar Christina ; gcc-patches@gcc.gnu.org
Cc: nd ; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into compare
greater.



On 05/10/2021 14:30, Tamar Christina wrote:




-Original Message-
From: Richard Earnshaw 
Sent: Tuesday, October 5, 2021 1:56 PM
To: Tamar Christina ;
gcc-patches@gcc.gnu.org
Cc: nd ; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into
compare greater.



On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:

Hi All,

This turns an inversion of the sign bit + arithmetic right shift
into a comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
   for (int i = 0; i < (n & -16); i++)
 x[i] = (-x[i]) >> 31;
}


Notwithstanding that I think shifting a negative value right is
unspecified behaviour, I don't think this generates the same result
when x[i] is INT_MIN either, although negating that is also
unspecified since it can't be represented in an int.



You're right that they are implementation defined, but I think most
ISAs do have a sane Implementation of these two cases. At least both
x86 and AArch64 just replicate the signbit and for negate do two

complement negation. So INT_MIN works as expected and results in 0.

Which is not what the original code produces if you have wrapping ints,
because -INT_MIN is INT_MIN, and thus still negative.



True, but then you have a signed overflow which is undefined behaviour and not 
implementation defined

" If an exceptional condition occurs during the evaluation of an expression (that 
is, if the result is not mathematically defined or not in the range of representable 
values for its type), the behavior is undefined."

So it should still be acceptable to do in this case.


-fwrapv

R.




R.



But I'm happy to guard this behind some sort of target guard.

Regards,
Tamar


R.


now generates:

.L3:
   ldr q0, [x0]
   cmgtv0.4s, v0.4s, #0
   str q0, [x0], 16
   cmp x0, x1
   bne .L3

instead of:

.L3:
   ldr q0, [x0]
   neg v0.4s, v0.4s
   sshrv0.4s, v0.4s, 31
   str q0, [x0], 16
   cmp x0, x1
   bne .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/match.pd b/gcc/match.pd index



7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7

190c96d14398143 100644

--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ tree utype = unsigned_type_for (type); }
(convert (rshift (lshift (convert:utype @0) @2) @3))

+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)  (simplify
+  (rshift (negate:s @0) cst@1)
+   (with { tree ctype = TREE_TYPE (@0);
+  tree stype = TREE_TYPE (@1);
+  tree bt = truth_type_for (ctype); }
+(switch
+ /* Handle scalar case.  */
+ (if (INTEGRAL_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (ctype)
+ && !TYPE_UNSIGNED (ctype)
+ && canonicalize_math_after_vectorization_p ()
+ && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+ /* Handle vector case with a scalar immediate.  */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+ /* Handle vector case with a vector immediate.   */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+ && uniform_vector_p (@1))
+  (with { tree cst = vector_cst_elt (@1, 0);
+ tree t = TREE_TYPE (cst); }
+   (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+(convert:bt (gt:bt @0 { build_zero_cst (ctype); })
+
/* Fold (C1/X)*C2 into (C1*C2)/X.  */
(simplify
 (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2) diff --git
a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-

2.c

new file mode 100644
index



..fc0157cbc5c7996b481f2998bc

30176c96a669bb

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-05 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Richard Earnshaw 
> Sent: Tuesday, October 5, 2021 2:52 PM
> To: Tamar Christina ; gcc-patches@gcc.gnu.org
> Cc: nd ; rguent...@suse.de
> Subject: Re: [PATCH]middle-end convert negate + right shift into compare
> greater.
> 
> 
> 
> On 05/10/2021 14:49, Tamar Christina wrote:
> >> -Original Message-
> >> From: Richard Earnshaw 
> >> Sent: Tuesday, October 5, 2021 2:34 PM
> >> To: Tamar Christina ;
> >> gcc-patches@gcc.gnu.org
> >> Cc: nd ; rguent...@suse.de
> >> Subject: Re: [PATCH]middle-end convert negate + right shift into
> >> compare greater.
> >>
> >>
> >>
> >> On 05/10/2021 14:30, Tamar Christina wrote:
> >>>
> >>>
> >>>> -Original Message-
> >>>> From: Richard Earnshaw 
> >>>> Sent: Tuesday, October 5, 2021 1:56 PM
> >>>> To: Tamar Christina ;
> >>>> gcc-patches@gcc.gnu.org
> >>>> Cc: nd ; rguent...@suse.de
> >>>> Subject: Re: [PATCH]middle-end convert negate + right shift into
> >>>> compare greater.
> >>>>
> >>>>
> >>>>
> >>>> On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:
> >>>>> Hi All,
> >>>>>
> >>>>> This turns an inversion of the sign bit + arithmetic right shift
> >>>>> into a comparison with 0.
> >>>>>
> >>>>> i.e.
> >>>>>
> >>>>> void fun1(int32_t *x, int n)
> >>>>> {
> >>>>>for (int i = 0; i < (n & -16); i++)
> >>>>>  x[i] = (-x[i]) >> 31;
> >>>>> }
> >>>>>
> >>>> Notwithstanding that I think shifting a negative value right is
> >>>> unspecified behaviour, I don't think this generates the same result
> >>>> when x[i] is INT_MIN either, although negating that is also
> >>>> unspecified since it can't be represented in an int.
> >>>>
> >>>
> >>> You're right that they are implementation defined, but I think most
> >>> ISAs do have a sane Implementation of these two cases. At least both
> >>> x86 and AArch64 just replicate the signbit and for negate do two
> >> complement negation. So INT_MIN works as expected and results in 0.
> >>
> >> Which is not what the original code produces if you have wrapping
> >> ints, because -INT_MIN is INT_MIN, and thus still negative.
> >>
> >
> > True, but then you have a signed overflow which is undefined behaviour
> > and not implementation defined
> >
> > " If an exceptional condition occurs during the evaluation of an expression
> (that is, if the result is not mathematically defined or not in the range of
> representable values for its type), the behavior is undefined."
> >
> > So it should still be acceptable to do in this case.
> 
> -fwrapv

If I understand correctly, you're happy with this is I guard it on ! flag_wrapv 
?

Regards,
Tamar
> 
> R.
> 
> >
> >> R.
> >>
> >>>
> >>> But I'm happy to guard this behind some sort of target guard.
> >>>
> >>> Regards,
> >>> Tamar
> >>>
> >>>> R.
> >>>>
> >>>>> now generates:
> >>>>>
> >>>>> .L3:
> >>>>>ldr q0, [x0]
> >>>>>cmgtv0.4s, v0.4s, #0
> >>>>>str q0, [x0], 16
> >>>>>cmp x0, x1
> >>>>>bne .L3
> >>>>>
> >>>>> instead of:
> >>>>>
> >>>>> .L3:
> >>>>>ldr q0, [x0]
> >>>>>neg v0.4s, v0.4s
> >>>>>sshrv0.4s, v0.4s, 31
> >>>>>str q0, [x0], 16
> >>>>>cmp x0, x1
> >>>>>bne .L3
> >>>>>
> >>>>> Bootstrapped Regtested on aarch64-none-linux-gnu,
> >>>>> x86_64-pc-linux-gnu and no regressions.
> >>>>>
> >>>>> Ok for master?
> >>>>>
> >>>>> Thanks,
> >>>>> Tamar
> >>>>>
> >>>>> gcc/ChangeLog:
> >

Re: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-07 Thread Richard Earnshaw via Gcc-patches




On 05/10/2021 14:56, Tamar Christina via Gcc-patches wrote:




-Original Message-
From: Richard Earnshaw 
Sent: Tuesday, October 5, 2021 2:52 PM
To: Tamar Christina ; gcc-patches@gcc.gnu.org
Cc: nd ; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into compare
greater.



On 05/10/2021 14:49, Tamar Christina wrote:

-Original Message-
From: Richard Earnshaw 
Sent: Tuesday, October 5, 2021 2:34 PM
To: Tamar Christina ;
gcc-patches@gcc.gnu.org
Cc: nd ; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into
compare greater.



On 05/10/2021 14:30, Tamar Christina wrote:




-Original Message-
From: Richard Earnshaw 
Sent: Tuesday, October 5, 2021 1:56 PM
To: Tamar Christina ;
gcc-patches@gcc.gnu.org
Cc: nd ; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into
compare greater.



On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:

Hi All,

This turns an inversion of the sign bit + arithmetic right shift
into a comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
  x[i] = (-x[i]) >> 31;
}


Notwithstanding that I think shifting a negative value right is
unspecified behaviour, I don't think this generates the same result
when x[i] is INT_MIN either, although negating that is also
unspecified since it can't be represented in an int.



You're right that they are implementation defined, but I think most
ISAs do have a sane Implementation of these two cases. At least both
x86 and AArch64 just replicate the signbit and for negate do two

complement negation. So INT_MIN works as expected and results in 0.

Which is not what the original code produces if you have wrapping
ints, because -INT_MIN is INT_MIN, and thus still negative.



True, but then you have a signed overflow which is undefined behaviour
and not implementation defined

" If an exceptional condition occurs during the evaluation of an expression

(that is, if the result is not mathematically defined or not in the range of
representable values for its type), the behavior is undefined."


So it should still be acceptable to do in this case.


-fwrapv


If I understand correctly, you're happy with this is I guard it on ! flag_wrapv 
?


I did some more digging.  Right shift of a negative value is IMP_DEF 
(not UNDEF - this keeps catching me out).  So yes, wrapping this with 
!wrapv would address my concern.


I've not reviewed the patch itself, though.  I've never even written a 
patch for match.pd, so don't feel qualified to do that.


R.



Regards,
Tamar


R.




R.



But I'm happy to guard this behind some sort of target guard.

Regards,
Tamar


R.


now generates:

.L3:
ldr q0, [x0]
cmgtv0.4s, v0.4s, #0
str q0, [x0], 16
cmp x0, x1
bne .L3

instead of:

.L3:
ldr q0, [x0]
neg v0.4s, v0.4s
sshrv0.4s, v0.4s, 31
str q0, [x0], 16
cmp x0, x1
bne .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/match.pd b/gcc/match.pd index





7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7

190c96d14398143 100644

--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 { tree utype = unsigned_type_for (type); }
 (convert (rshift (lshift (convert:utype @0) @2) @3))

+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)  (simplify
+  (rshift (negate:s @0) cst@1)
+   (with { tree ctype = TREE_TYPE (@0);
+  tree stype = TREE_TYPE (@1);
+  tree bt = truth_type_for (ctype); }
+(switch
+ /* Handle scalar case.  */
+ (if (INTEGRAL_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (ctype)
+ && !TYPE_UNSIGNED (ctype)
+ && canonicalize_math_after_vectorization_p ()
+ && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) -

1))

+  (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+ /* Handle vector case with a scalar immediate.  */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ && !VECTOR_TYPE_P (stype)
+ && !TYPE_UNSIGNED (ctype)
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+  (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+ /* Handle vector case with a vector immediate.   */
+ (if (VECTOR_INTEGER_TYPE_P (ctype)
+ &&

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-11 Thread Tamar Christina via Gcc-patches
Hi all,

Here's a new version of the patch.

> >>> " If an exceptional condition occurs during the evaluation of an
> >>> expression
> >> (that is, if the result is not mathematically defined or not in the
> >> range of representable values for its type), the behavior is undefined."
> >>>
> >>> So it should still be acceptable to do in this case.
> >>
> >> -fwrapv
> >
> > If I understand correctly, you're happy with this is I guard it on ! 
> > flag_wrapv ?
> 
> I did some more digging.  Right shift of a negative value is IMP_DEF (not
> UNDEF - this keeps catching me out).  So yes, wrapping this with !wrapv
> would address my concern.
> 
> I've not reviewed the patch itself, though.  I've never even written a patch
> for match.pd, so don't feel qualified to do that.

No problem, thanks for catching this! I'm sure one of the Richards will review 
it when
they have a chance.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..3d48eda826f889483a83267409c3f278ee907b57
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 { tree utype = unsigned_type_for (type); }
 (convert (rshift (lshift (convert:utype @0) @2) @3))
 
+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!flag_wrapv)
+(with { tree ctype = TREE_TYPE (@0);
+   tree stype = TREE_TYPE (@1);
+   tree bt = truth_type_for (ctype); }
+ (switch
+  /* Handle scalar case.  */
+  (if (INTEGRAL_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (ctype)
+  && !TYPE_UNSIGNED (ctype)
+  && canonicalize_math_after_vectorization_p ()
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+  /* Handle vector case with a scalar immediate.  */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+   && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+   (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+  /* Handle vector case with a vector immediate.   */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+  && uniform_vector_p (@1))
+   (with { tree cst = vector_cst_elt (@1, 0);
+  tree t = TREE_TYPE (cst); }
+(if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+ (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))
+
 /* Fold (C1/X)*C2 into (C1*C2)/X.  */
 (simplify
  (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 
..fc0157cbc5c7996b481f2998bc30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } 
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c
new file mode 100644
index 
..19e9c06c349b3287610f817628f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c 
b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index 
..3ebfb0586f37de29cf58635b27fe48503714447e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-13 Thread Richard Biener via Gcc-patches
On Mon, 11 Oct 2021, Tamar Christina wrote:

> Hi all,
> 
> Here's a new version of the patch.
> 
> > >>> " If an exceptional condition occurs during the evaluation of an
> > >>> expression
> > >> (that is, if the result is not mathematically defined or not in the
> > >> range of representable values for its type), the behavior is undefined."
> > >>>
> > >>> So it should still be acceptable to do in this case.
> > >>
> > >> -fwrapv
> > >
> > > If I understand correctly, you're happy with this is I guard it on ! 
> > > flag_wrapv ?
> > 
> > I did some more digging.  Right shift of a negative value is IMP_DEF (not
> > UNDEF - this keeps catching me out).  So yes, wrapping this with !wrapv
> > would address my concern.
> > 
> > I've not reviewed the patch itself, though.  I've never even written a patch
> > for match.pd, so don't feel qualified to do that.
> 
> No problem, thanks for catching this! I'm sure one of the Richards will 
> review it when
> they have a chance.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no regressions.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * match.pd: New negate+shift pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/signbit-2.c: New test.
>   * gcc.dg/signbit-3.c: New test.
>   * gcc.target/aarch64/signbit-1.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..3d48eda826f889483a83267409c3f278ee907b57
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -826,6 +826,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  { tree utype = unsigned_type_for (type); }
>  (convert (rshift (lshift (convert:utype @0) @2) @3))
>  
> +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> +(for cst (INTEGER_CST VECTOR_CST)
> + (simplify
> +  (rshift (negate:s @0) cst@1)
> +   (if (!flag_wrapv)

Don't test flag_wrapv directly, instead use the appropriate
TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure
what we are protecting against?  Right-shift of signed integers
is implementation-defined and GCC treats it as you'd expect,
sign-extending the result.

> +(with { tree ctype = TREE_TYPE (@0);
> + tree stype = TREE_TYPE (@1);
> + tree bt = truth_type_for (ctype); }
> + (switch
> +  /* Handle scalar case.  */
> +  (if (INTEGRAL_TYPE_P (ctype)
> +&& !VECTOR_TYPE_P (ctype)
> +&& !TYPE_UNSIGNED (ctype)
> +&& canonicalize_math_after_vectorization_p ()
> +&& wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> +   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))

I'm not sure why the result is of type 'bt' rather than the
original type of the expression?

In that regard for non-vectors we'd have to add the sign
extension from unsigned bool, in the vector case we'd
hope the type of the comparison is correct.  I think in
both cases it might be convenient to use

  (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst 
(ctype); } { build_zero_cost (ctype); })

to compute the correct result and rely on (cond ..) simplifications
to simplify that if possible.

Btw, 'stype' should be irrelevant - you need to look at
the precision of 'ctype', no?

Richard.

> +  /* Handle vector case with a scalar immediate.  */
> +  (if (VECTOR_INTEGER_TYPE_P (ctype)
> +&& !VECTOR_TYPE_P (stype)
> +&& !TYPE_UNSIGNED (ctype)
> +   && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> +   (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
> +  /* Handle vector case with a vector immediate.   */
> +  (if (VECTOR_INTEGER_TYPE_P (ctype)
> +&& VECTOR_TYPE_P (stype)
> +&& !TYPE_UNSIGNED (ctype)
> +&& uniform_vector_p (@1))
> +   (with { tree cst = vector_cst_elt (@1, 0);
> +tree t = TREE_TYPE (cst); }
> +(if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
> + (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))
> +
>  /* Fold (C1/X)*C2 into (C1*C2)/X.  */
>  (simplify
>   (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> b/gcc/testsuite/gcc.dg/signbit-2.c
> new file mode 100644
> index 
> ..fc0157cbc5c7996b481f2998bc30176c96a669bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
> +
> +#include 
> +
> +void fun1(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } 
> } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-15 Thread Tamar Christina via Gcc-patches
> >
> > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */ (for
> > +cst (INTEGER_CST VECTOR_CST)  (simplify
> > +  (rshift (negate:s @0) cst@1)
> > +   (if (!flag_wrapv)
> 
> Don't test flag_wrapv directly, instead use the appropriate
> TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure what
> we are protecting against?  Right-shift of signed integers is implementation-
> defined and GCC treats it as you'd expect, sign-extending the result.
> 

It's protecting against the overflow of the negate on INT_MIN. When wrapping
overflows are enabled the results would be wrong.

> > +(with { tree ctype = TREE_TYPE (@0);
> > +   tree stype = TREE_TYPE (@1);
> > +   tree bt = truth_type_for (ctype); }
> > + (switch
> > +  /* Handle scalar case.  */
> > +  (if (INTEGRAL_TYPE_P (ctype)
> > +  && !VECTOR_TYPE_P (ctype)
> > +  && !TYPE_UNSIGNED (ctype)
> > +  && canonicalize_math_after_vectorization_p ()
> > +  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> > +   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
> 
> I'm not sure why the result is of type 'bt' rather than the original type of 
> the
> expression?

That was to satisfy some RTL check that expected results of comparisons to 
always
be a Boolean, though for scalar that logically always is the case, I just added 
it
for consistency.

> 
> In that regard for non-vectors we'd have to add the sign extension from
> unsigned bool, in the vector case we'd hope the type of the comparison is
> correct.  I think in both cases it might be convenient to use
> 
>   (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst (ctype); }
> { build_zero_cost (ctype); })
> 
> to compute the correct result and rely on (cond ..) simplifications to 
> simplify
> that if possible.
> 
> Btw, 'stype' should be irrelevant - you need to look at the precision of 
> 'ctype',
> no?

I was working under the assumption that both input types must have the same
precision, but turns out that assumption doesn't need to hold.

New version attached.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..9532cae582e152cae6e22fcce95a9744a844e3c2
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
uniform_integer_cst_p
HONOR_NANS
uniform_vector_p
-   bitmask_inv_cst_vector_p)
+   bitmask_inv_cst_vector_p
+   expand_vec_cmp_expr_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -826,6 +827,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 { tree utype = unsigned_type_for (type); }
 (convert (rshift (lshift (convert:utype @0) @2) @3))
 
+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!TYPE_OVERFLOW_WRAPS (type))
+(with { tree ctype = TREE_TYPE (@0);
+   tree stype = TREE_TYPE (@1);
+   tree bt = truth_type_for (ctype);
+   tree zeros = build_zero_cst (ctype); }
+ (switch
+  /* Handle scalar case.  */
+  (if (INTEGRAL_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (ctype)
+  && !TYPE_UNSIGNED (ctype)
+  && canonicalize_math_after_vectorization_p ()
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (ctype) - 1))
+   (cond (gt:bt @0 { zeros; }) { build_all_ones_cst (ctype); } { zeros; }))
+  /* Handle vector case with a scalar immediate.  */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+  && expand_vec_cmp_expr_p (ctype, ctype, { GT_EXPR }))
+   (with { HOST_WIDE_INT bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (ctype)); 
}
+   (if (wi::eq_p (wi::to_wide (@1), bits - 1))
+(convert:bt (gt:bt @0 { zeros; })
+  /* Handle vector case with a vector immediate.   */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+  && uniform_vector_p (@1)
+  && expand_vec_cmp_expr_p (ctype, ctype, { GT_EXPR }))
+   (with { tree cst = vector_cst_elt (@1, 0);
+  HOST_WIDE_INT bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (ctype)); }
+(if (wi::eq_p (wi::to_wide (cst), bits - 1))
+(convert:bt (gt:bt @0 { zeros; }))
+
 /* Fold (C1/X)*C2 into (C1*C2)/X.  */
 (simplify
  (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 
000

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-15 Thread Richard Biener via Gcc-patches
On Fri, 15 Oct 2021, Tamar Christina wrote:

> > >
> > > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */ (for
> > > +cst (INTEGER_CST VECTOR_CST)  (simplify
> > > +  (rshift (negate:s @0) cst@1)
> > > +   (if (!flag_wrapv)
> > 
> > Don't test flag_wrapv directly, instead use the appropriate
> > TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure what
> > we are protecting against?  Right-shift of signed integers is 
> > implementation-
> > defined and GCC treats it as you'd expect, sign-extending the result.
> > 
> 
> It's protecting against the overflow of the negate on INT_MIN. When wrapping
> overflows are enabled the results would be wrong.

But -INT_MIN == INT_MIN in twos-complement so I fail to see the wrong
result?  That is, both -INT_MIN >> 31 and INT_MIN >> 31 are -1.

> > > +(with { tree ctype = TREE_TYPE (@0);
> > > + tree stype = TREE_TYPE (@1);
> > > + tree bt = truth_type_for (ctype); }
> > > + (switch
> > > +  /* Handle scalar case.  */
> > > +  (if (INTEGRAL_TYPE_P (ctype)
> > > +&& !VECTOR_TYPE_P (ctype)
> > > +&& !TYPE_UNSIGNED (ctype)
> > > +&& canonicalize_math_after_vectorization_p ()
> > > +&& wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> > > +   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
> > 
> > I'm not sure why the result is of type 'bt' rather than the original type 
> > of the
> > expression?
> 
> That was to satisfy some RTL check that expected results of comparisons to 
> always
> be a Boolean, though for scalar that logically always is the case, I just 
> added it
> for consistency.
> 
> > 
> > In that regard for non-vectors we'd have to add the sign extension from
> > unsigned bool, in the vector case we'd hope the type of the comparison is
> > correct.  I think in both cases it might be convenient to use
> > 
> >   (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst 
> > (ctype); }
> > { build_zero_cost (ctype); })
> > 
> > to compute the correct result and rely on (cond ..) simplifications to 
> > simplify
> > that if possible.
> > 
> > Btw, 'stype' should be irrelevant - you need to look at the precision of 
> > 'ctype',
> > no?
> 
> I was working under the assumption that both input types must have the same
> precision, but turns out that assumption doesn't need to hold.
> 
> New version attached.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no regressions.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * match.pd: New negate+shift pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/signbit-2.c: New test.
>   * gcc.dg/signbit-3.c: New test.
>   * gcc.target/aarch64/signbit-1.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..9532cae582e152cae6e22fcce95a9744a844e3c2
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
> uniform_integer_cst_p
> HONOR_NANS
> uniform_vector_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   expand_vec_cmp_expr_p)
>  
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -826,6 +827,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  { tree utype = unsigned_type_for (type); }
>  (convert (rshift (lshift (convert:utype @0) @2) @3))
>  
> +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> +(for cst (INTEGER_CST VECTOR_CST)
> + (simplify
> +  (rshift (negate:s @0) cst@1)
> +   (if (!TYPE_OVERFLOW_WRAPS (type))

as said, I don't think that's necessary but at least it's now
written correctly ;)

> +(with { tree ctype = TREE_TYPE (@0);

Instead of 'ctype' you can use 'type' since the type of the expression
is the same as that of @0

> + tree stype = TREE_TYPE (@1);
> + tree bt = truth_type_for (ctype);
> + tree zeros = build_zero_cst (ctype); }
> + (switch
> +  /* Handle scalar case.  */
> +  (if (INTEGRAL_TYPE_P (ctype)
> +&& !VECTOR_TYPE_P (ctype)

INTEGRAL_TYPE_P does not include VECTOR_TYPE_P.

> +&& !TYPE_UNSIGNED (ctype)
> +&& canonicalize_math_after_vectorization_p ()
> +&& wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (ctype) - 1))
> +   (cond (gt:bt @0 { zeros; }) { build_all_ones_cst (ctype); } { zeros; 
> }))
> +  /* Handle vector case with a scalar immediate.  */
> +  (if (VECTOR_INTEGER_TYPE_P (ctype)
> +&& !VECTOR_TYPE_P (stype)
> +&& !TYPE_UNSIGNED (ctype)
> +&& expand_vec_cmp_expr_p (ctype, ctype, { GT_EXPR }))
> +   (with { HOST_WIDE_INT bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE 
> (ctype)); }
> + (if (wi::eq_p (wi::to_wide (@1), bits - 1))

You can use element_precision (@0) - 1 in both the scalar and vector case.

> +  (convert:bt (gt:bt @0 { zeros; })
> +  /* Handle vector case with a vector immed

Re: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-15 Thread Richard Earnshaw via Gcc-patches

On 15/10/2021 10:06, Richard Biener via Gcc-patches wrote:

On Fri, 15 Oct 2021, Tamar Christina wrote:



+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */ (for
+cst (INTEGER_CST VECTOR_CST)  (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!flag_wrapv)


Don't test flag_wrapv directly, instead use the appropriate
TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure what
we are protecting against?  Right-shift of signed integers is implementation-
defined and GCC treats it as you'd expect, sign-extending the result.



It's protecting against the overflow of the negate on INT_MIN. When wrapping
overflows are enabled the results would be wrong.


But -INT_MIN == INT_MIN in twos-complement so I fail to see the wrong
result?  That is, both -INT_MIN >> 31 and INT_MIN >> 31 are -1.


Exactly, so transforming the original testcase from (x = -a >> 31) into 
(x = -(a > 0)) is not valid in that case.


R.




+(with { tree ctype = TREE_TYPE (@0);
+   tree stype = TREE_TYPE (@1);
+   tree bt = truth_type_for (ctype); }
+ (switch
+  /* Handle scalar case.  */
+  (if (INTEGRAL_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (ctype)
+  && !TYPE_UNSIGNED (ctype)
+  && canonicalize_math_after_vectorization_p ()
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))


I'm not sure why the result is of type 'bt' rather than the original type of the
expression?


That was to satisfy some RTL check that expected results of comparisons to 
always
be a Boolean, though for scalar that logically always is the case, I just added 
it
for consistency.



In that regard for non-vectors we'd have to add the sign extension from
unsigned bool, in the vector case we'd hope the type of the comparison is
correct.  I think in both cases it might be convenient to use

   (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst (ctype); }
{ build_zero_cost (ctype); })

to compute the correct result and rely on (cond ..) simplifications to simplify
that if possible.

Btw, 'stype' should be irrelevant - you need to look at the precision of 
'ctype',
no?


I was working under the assumption that both input types must have the same
precision, but turns out that assumption doesn't need to hold.

New version attached.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..9532cae582e152cae6e22fcce95a9744a844e3c2
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
 uniform_integer_cst_p
 HONOR_NANS
 uniform_vector_p
-   bitmask_inv_cst_vector_p)
+   bitmask_inv_cst_vector_p
+   expand_vec_cmp_expr_p)
  
  /* Operator lists.  */

  (define_operator_list tcc_comparison
@@ -826,6 +827,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  { tree utype = unsigned_type_for (type); }
  (convert (rshift (lshift (convert:utype @0) @2) @3))
  
+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */

+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!TYPE_OVERFLOW_WRAPS (type))


as said, I don't think that's necessary but at least it's now
written correctly ;)


+(with { tree ctype = TREE_TYPE (@0);


Instead of 'ctype' you can use 'type' since the type of the expression
is the same as that of @0


+   tree stype = TREE_TYPE (@1);
+   tree bt = truth_type_for (ctype);
+   tree zeros = build_zero_cst (ctype); }
+ (switch
+  /* Handle scalar case.  */
+  (if (INTEGRAL_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (ctype)


INTEGRAL_TYPE_P does not include VECTOR_TYPE_P.


+  && !TYPE_UNSIGNED (ctype)
+  && canonicalize_math_after_vectorization_p ()
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (ctype) - 1))
+   (cond (gt:bt @0 { zeros; }) { build_all_ones_cst (ctype); } { zeros; }))
+  /* Handle vector case with a scalar immediate.  */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+  && expand_vec_cmp_expr_p (ctype, ctype, { GT_EXPR }))
+   (with { HOST_WIDE_INT bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (ctype)); 
}
+   (if (wi::eq_p (wi::to_wide (@1), bits - 1))


You can use element_precision (@0) - 1 in both the scalar and vector case.


+(convert:bt (gt:bt @0 { zeros; })
+  /* Handle vector case with a vector immediate.   */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && VECTOR_TYPE_P (stype)
+

Re: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-15 Thread Richard Biener via Gcc-patches
On Fri, 15 Oct 2021, Richard Earnshaw wrote:

> On 15/10/2021 10:06, Richard Biener via Gcc-patches wrote:
> > On Fri, 15 Oct 2021, Tamar Christina wrote:
> > 
> 
>  +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */ (for
>  +cst (INTEGER_CST VECTOR_CST)  (simplify
>  +  (rshift (negate:s @0) cst@1)
>  +   (if (!flag_wrapv)
> >>>
> >>> Don't test flag_wrapv directly, instead use the appropriate
> >>> TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure what
> >>> we are protecting against?  Right-shift of signed integers is
> >>> implementation-
> >>> defined and GCC treats it as you'd expect, sign-extending the result.
> >>>
> >>
> >> It's protecting against the overflow of the negate on INT_MIN. When
> >> wrapping
> >> overflows are enabled the results would be wrong.
> > 
> > But -INT_MIN == INT_MIN in twos-complement so I fail to see the wrong
> > result?  That is, both -INT_MIN >> 31 and INT_MIN >> 31 are -1.
> 
> Exactly, so transforming the original testcase from (x = -a >> 31) into (x =
> -(a > 0)) is not valid in that case.

Hmm, but we're not doing that.  Actually, we inconsistently handle
the scalar and the vector variant here - maybe the (negate ..)
is missing around the (gt @0 { ...}) of the scalar case.

Btw, I would appreciate testcases for the cases that would go wrong,
indeed INT_MIN would be handled wrong.

Richard.



> R.
> 
> > 
>  +(with { tree ctype = TREE_TYPE (@0);
>  +tree stype = TREE_TYPE (@1);
>  +tree bt = truth_type_for (ctype); }
>  + (switch
>  +  /* Handle scalar case.  */
>  +  (if (INTEGRAL_TYPE_P (ctype)
>  +   && !VECTOR_TYPE_P (ctype)
>  +   && !TYPE_UNSIGNED (ctype)
>  +   && canonicalize_math_after_vectorization_p ()
>  +   && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
>  +   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
> >>>
> >>> I'm not sure why the result is of type 'bt' rather than the original type
> >>> of the
> >>> expression?
> >>
> >> That was to satisfy some RTL check that expected results of comparisons to
> >> always
> >> be a Boolean, though for scalar that logically always is the case, I just
> >> added it
> >> for consistency.
> >>
> >>>
> >>> In that regard for non-vectors we'd have to add the sign extension from
> >>> unsigned bool, in the vector case we'd hope the type of the comparison is
> >>> correct.  I think in both cases it might be convenient to use
> >>>
> >>>(cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst
> >>> (ctype); }
> >>> { build_zero_cost (ctype); })
> >>>
> >>> to compute the correct result and rely on (cond ..) simplifications to
> >>> simplify
> >>> that if possible.
> >>>
> >>> Btw, 'stype' should be irrelevant - you need to look at the precision of
> >>> 'ctype',
> >>> no?
> >>
> >> I was working under the assumption that both input types must have the same
> >> precision, but turns out that assumption doesn't need to hold.
> >>
> >> New version attached.
> >>
> >> Bootstrapped Regtested on aarch64-none-linux-gnu,
> >> x86_64-pc-linux-gnu and no regressions.
> >>
> >> Ok for master?
> >>
> >> Thanks,
> >> Tamar
> >>
> >> gcc/ChangeLog:
> >>
> >>  * match.pd: New negate+shift pattern.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  * gcc.dg/signbit-2.c: New test.
> >>  * gcc.dg/signbit-3.c: New test.
> >>  * gcc.target/aarch64/signbit-1.c: New test.
> >>
> >> --- inline copy of patch ---
> >>
> >> diff --git a/gcc/match.pd b/gcc/match.pd
> >> index
> >> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..9532cae582e152cae6e22fcce95a9744a844e3c2
> >> 100644
> >> --- a/gcc/match.pd
> >> +++ b/gcc/match.pd
> >> @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
> >>  uniform_integer_cst_p
> >>  HONOR_NANS
> >>  uniform_vector_p
> >> -   bitmask_inv_cst_vector_p)
> >> +   bitmask_inv_cst_vector_p
> >> +   expand_vec_cmp_expr_p)
> >>   
> >>   /* Operator lists.  */
> >>   (define_operator_list tcc_comparison
> >> @@ -826,6 +827,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >>   { tree utype = unsigned_type_for (type); }
> >>   (convert (rshift (lshift (convert:utype @0) @2) @3))
> >>   +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> >> +(for cst (INTEGER_CST VECTOR_CST)
> >> + (simplify
> >> +  (rshift (negate:s @0) cst@1)
> >> +   (if (!TYPE_OVERFLOW_WRAPS (type))
> > 
> > as said, I don't think that's necessary but at least it's now
> > written correctly ;)
> > 
> >> +(with { tree ctype = TREE_TYPE (@0);
> > 
> > Instead of 'ctype' you can use 'type' since the type of the expression
> > is the same as that of @0
> > 
> >> +  tree stype = TREE_TYPE (@1);
> >> +  tree bt = truth_type_for (ctype);
> >> +  tree zeros = build_zero_cst (ctype); }
> >> + (switch
> >> +  /* Handle scalar case.  */
> >> +  (if (INTEGRAL_TYPE_P (ctype)
> >> + && !VECTOR_TYPE

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-15 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Richard Biener 
> Sent: Friday, October 15, 2021 10:07 AM
> To: Tamar Christina 
> Cc: Richard Earnshaw ; gcc-
> patc...@gcc.gnu.org; nd 
> Subject: RE: [PATCH]middle-end convert negate + right shift into compare
> greater.
> 
> On Fri, 15 Oct 2021, Tamar Christina wrote:
> 
> > > >
> > > > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> > > > +(for cst (INTEGER_CST VECTOR_CST)  (simplify
> > > > +  (rshift (negate:s @0) cst@1)
> > > > +   (if (!flag_wrapv)
> > >
> > > Don't test flag_wrapv directly, instead use the appropriate
> > > TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure
> what
> > > we are protecting against?  Right-shift of signed integers is
> > > implementation- defined and GCC treats it as you'd expect, sign-
> extending the result.
> > >
> >
> > It's protecting against the overflow of the negate on INT_MIN. When
> > wrapping overflows are enabled the results would be wrong.
> 
> But -INT_MIN == INT_MIN in twos-complement so I fail to see the wrong
> result?  That is, both -INT_MIN >> 31 and INT_MIN >> 31 are -1.
> 
> > > > +(with { tree ctype = TREE_TYPE (@0);
> > > > +   tree stype = TREE_TYPE (@1);
> > > > +   tree bt = truth_type_for (ctype); }
> > > > + (switch
> > > > +  /* Handle scalar case.  */
> > > > +  (if (INTEGRAL_TYPE_P (ctype)
> > > > +  && !VECTOR_TYPE_P (ctype)
> > > > +  && !TYPE_UNSIGNED (ctype)
> > > > +  && canonicalize_math_after_vectorization_p ()
> > > > +  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> > > > +   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
> > >
> > > I'm not sure why the result is of type 'bt' rather than the original
> > > type of the expression?
> >
> > That was to satisfy some RTL check that expected results of
> > comparisons to always be a Boolean, though for scalar that logically
> > always is the case, I just added it for consistency.
> >
> > >
> > > In that regard for non-vectors we'd have to add the sign extension
> > > from unsigned bool, in the vector case we'd hope the type of the
> > > comparison is correct.  I think in both cases it might be convenient
> > > to use
> > >
> > >   (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst
> > > (ctype); } { build_zero_cost (ctype); })
> > >
> > > to compute the correct result and rely on (cond ..) simplifications
> > > to simplify that if possible.
> > >
> > > Btw, 'stype' should be irrelevant - you need to look at the
> > > precision of 'ctype', no?
> >
> > I was working under the assumption that both input types must have the
> > same precision, but turns out that assumption doesn't need to hold.
> >
> > New version attached.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no regressions.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * match.pd: New negate+shift pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/signbit-2.c: New test.
> > * gcc.dg/signbit-3.c: New test.
> > * gcc.target/aarch64/signbit-1.c: New test.
> >
> > --- inline copy of patch ---
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd index
> >
> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..9532cae582e152cae6e22fcce
> 95a
> > 9744a844e3c2 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
> > uniform_integer_cst_p
> > HONOR_NANS
> > uniform_vector_p
> > -   bitmask_inv_cst_vector_p)
> > +   bitmask_inv_cst_vector_p
> > +   expand_vec_cmp_expr_p)
> >
> >  /* Operator lists.  */
> >  (define_operator_list tcc_comparison
> > @@ -826,6 +827,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  { tree utype = unsigned_type_for (type); }
> >  (convert (rshift (lshift (convert:utype @0) @2) @3))
> >
> > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */ (for
> > +cst (INTEGER_CST VECTOR_CST)  (simplify
> > +  (rshift (negate:s @0) cst@1)
>