On 05/10/2021 14:30, Tamar Christina wrote:


-----Original Message-----
From: Richard Earnshaw <richard.earns...@foss.arm.com>
Sent: Tuesday, October 5, 2021 1:56 PM
To: Tamar Christina <tamar.christ...@arm.com>; gcc-patches@gcc.gnu.org
Cc: nd <n...@arm.com>; rguent...@suse.de
Subject: Re: [PATCH]middle-end convert negate + right shift into compare
greater.



On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote:
Hi All,

This turns an inversion of the sign bit + arithmetic right shift into
a comparison with 0.

i.e.

void fun1(int32_t *x, int n)
{
      for (int i = 0; i < (n & -16); i++)
        x[i] = (-x[i]) >> 31;
}

Notwithstanding that I think shifting a negative value right is unspecified
behaviour, I don't think this generates the same result when x[i] is INT_MIN
either, although negating that is also unspecified since it can't be
represented in an int.


You're right that they are implementation defined, but I think most ISAs do 
have a sane
Implementation of these two cases. At least both x86 and AArch64 just replicate 
the signbit
and for negate do two complement negation. So INT_MIN works as expected and 
results in 0.

Which is not what the original code produces if you have wrapping ints, because -INT_MIN is INT_MIN, and thus still negative.

R.


But I'm happy to guard this behind some sort of target guard.

Regards,
Tamar

R.

now generates:

.L3:
          ldr     q0, [x0]
          cmgt    v0.4s, v0.4s, #0
          str     q0, [x0], 16
          cmp     x0, x1
          bne     .L3

instead of:

.L3:
          ldr     q0, [x0]
          neg     v0.4s, v0.4s
          sshr    v0.4s, v0.4s, 31
          str     q0, [x0], 16
          cmp     x0, x1
          bne     .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

        * gcc.dg/signbit-2.c: New test.
        * gcc.dg/signbit-3.c: New test.
        * gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/match.pd b/gcc/match.pd
index
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7
190c96d14398143 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       { tree utype = unsigned_type_for (type); }
       (convert (rshift (lshift (convert:utype @0) @2) @3))))))

+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (with { tree ctype = TREE_TYPE (@0);
+          tree stype = TREE_TYPE (@1);
+          tree bt = truth_type_for (ctype); }
+    (switch
+     /* Handle scalar case.  */
+     (if (INTEGRAL_TYPE_P (ctype)
+         && !VECTOR_TYPE_P (ctype)
+         && !TYPE_UNSIGNED (ctype)
+         && canonicalize_math_after_vectorization_p ()
+         && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+      (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+     /* Handle vector case with a scalar immediate.  */
+     (if (VECTOR_INTEGER_TYPE_P (ctype)
+         && !VECTOR_TYPE_P (stype)
+         && !TYPE_UNSIGNED (ctype)
+          && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+      (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+     /* Handle vector case with a vector immediate.   */
+     (if (VECTOR_INTEGER_TYPE_P (ctype)
+         && VECTOR_TYPE_P (stype)
+         && !TYPE_UNSIGNED (ctype)
+         && uniform_vector_p (@1))
+      (with { tree cst = vector_cst_elt (@1, 0);
+             tree t = TREE_TYPE (cst); }
+       (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+        (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))))))))
+
   /* Fold (C1/X)*C2 into (C1*C2)/X.  */
   (simplify
    (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-
2.c
new file mode 100644
index
0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc
30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } }
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-
3.c
new file mode 100644
index
0000000000000000000000000000000000000000..19e9c06c349b3287610f817628
f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c
b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index
0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27
fe48503714447e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */


Reply via email to