On 05/10/2021 14:49, Tamar Christina wrote:
-----Original Message-----
From: Richard Earnshaw <richard.earns...@foss.arm.com>
Sent: Tuesday, October 5, 2021 2:34 PM
To: Tamar Christina <tamar.christ...@arm.com>; gcc-patches@gcc.gnu.org
Cc: nd <n...@arm.com>; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into compare
greater.



On 05/10/2021 14:30, Tamar Christina wrote:


-----Original Message-----
From: Richard Earnshaw <richard.earns...@foss.arm.com>
Sent: Tuesday, October 5, 2021 1:56 PM
To: Tamar Christina <tamar.christ...@arm.com>;
gcc-patches@gcc.gnu.org
Cc: nd <n...@arm.com>; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into
compare greater.



On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:
Hi All,

This turns an inversion of the sign bit + arithmetic right shift
into a comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
       for (int i = 0; i < (n & -16); i++)
         x[i] = (-x[i]) >> 31;
}

Notwithstanding that I think shifting a negative value right is
unspecified behaviour, I don't think this generates the same result
when x[i] is INT_MIN either, although negating that is also
unspecified since it can't be represented in an int.


You're right that they are implementation defined, but I think most
ISAs do have a sane Implementation of these two cases. At least both
x86 and AArch64 just replicate the signbit and for negate do two
complement negation. So INT_MIN works as expected and results in 0.

Which is not what the original code produces if you have wrapping ints,
because -INT_MIN is INT_MIN, and thus still negative.


True, but then you have a signed overflow which is undefined behaviour and not 
implementation defined

" If an exceptional condition occurs during the evaluation of an expression (that 
is, if the result is not mathematically defined or not in the range of representable 
values for its type), the behavior is undefined."

So it should still be acceptable to do in this case.

-fwrapv

R.


R.


But I'm happy to guard this behind some sort of target guard.

Regards,
Tamar

R.

now generates:

.L3:
           ldr     q0, [x0]
           cmgt    v0.4s, v0.4s, #0
           str     q0, [x0], 16
           cmp     x0, x1
           bne     .L3

instead of:

.L3:
           ldr     q0, [x0]
           neg     v0.4s, v0.4s
           sshr    v0.4s, v0.4s, 31
           str     q0, [x0], 16
           cmp     x0, x1
           bne     .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

        * gcc.dg/signbit-2.c: New test.
        * gcc.dg/signbit-3.c: New test.
        * gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/match.pd b/gcc/match.pd index

7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7
190c96d14398143 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        { tree utype = unsigned_type_for (type); }
        (convert (rshift (lshift (convert:utype @0) @2) @3))))))

+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)  (simplify
+  (rshift (negate:s @0) cst@1)
+   (with { tree ctype = TREE_TYPE (@0);
+          tree stype = TREE_TYPE (@1);
+          tree bt = truth_type_for (ctype); }
+    (switch
+     /* Handle scalar case.  */
+     (if (INTEGRAL_TYPE_P (ctype)
+         && !VECTOR_TYPE_P (ctype)
+         && !TYPE_UNSIGNED (ctype)
+         && canonicalize_math_after_vectorization_p ()
+         && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+      (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+     /* Handle vector case with a scalar immediate.  */
+     (if (VECTOR_INTEGER_TYPE_P (ctype)
+         && !VECTOR_TYPE_P (stype)
+         && !TYPE_UNSIGNED (ctype)
+          && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+      (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+     /* Handle vector case with a vector immediate.   */
+     (if (VECTOR_INTEGER_TYPE_P (ctype)
+         && VECTOR_TYPE_P (stype)
+         && !TYPE_UNSIGNED (ctype)
+         && uniform_vector_p (@1))
+      (with { tree cst = vector_cst_elt (@1, 0);
+             tree t = TREE_TYPE (cst); }
+       (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+        (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))))))))
+
    /* Fold (C1/X)*C2 into (C1*C2)/X.  */
    (simplify
     (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2) diff --git
a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-
2.c
new file mode 100644
index

0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc
30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1
+optimized } }
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c
b/gcc/testsuite/gcc.dg/signbit-
3.c
new file mode 100644
index

0000000000000000000000000000000000000000..19e9c06c349b3287610f817628
f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c
b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index

0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27
fe48503714447e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */


Reply via email to