On Wed, 3 Nov 2021, Tamar Christina wrote: > Hi, > > I have addressed all the feedback and updated patch attached: > > Bootstrapped Regtested on aarch64-none-linux-gnu, > x86_64-pc-linux-gnu and no regressions. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * match.pd: New negate+shift pattern. > > gcc/testsuite/ChangeLog: > > * gcc.dg/signbit-2.c: New test. > * gcc.dg/signbit-3.c: New test. > * gcc.dg/signbit-4.c: New test. > * gcc.dg/signbit-5.c: New test. > * gcc.dg/signbit-6.c: New test. > * gcc.target/aarch64/signbit-1.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/match.pd b/gcc/match.pd > index > 65a6591f75c03333602147bbdf6d59f9ccd4b1e5..fe93500d22e2388889c8c9faf4c58cee95dec7f9 > 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3. If not see > uniform_integer_cst_p > HONOR_NANS > uniform_vector_p > - bitmask_inv_cst_vector_p) > + bitmask_inv_cst_vector_p > + expand_vec_cmp_expr_p) > > /* Operator lists. */ > (define_operator_list tcc_comparison > @@ -832,6 +833,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > { tree utype = unsigned_type_for (type); } > (convert (rshift (lshift (convert:utype @0) @2) @3)))))) > > +/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1. */ > +(for cst (INTEGER_CST VECTOR_CST) > + (simplify > + (rshift (negate:s @0) cst@1) > + (if (!TYPE_UNSIGNED (type) > + && TYPE_OVERFLOW_UNDEFINED (type)) > + (with { tree stype = TREE_TYPE (@1); > + tree bt = truth_type_for (type); > + tree zeros = build_zero_cst (type); } > + (switch > + /* Handle scalar case. */ > + (if (INTEGRAL_TYPE_P (type) > + /* If we apply the rule to the scalar type before vectorization > + we will enforce the result of the comparison being a bool > + which will require an extra AND on the result that will be > + indistinguishable from when the user did actually want 0 > + or 1 as the result so it can't be removed. */ > + && canonicalize_math_after_vectorization_p () > + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1)) > + (negate (convert (gt @0 { zeros; })))) > + /* Handle vector case. */ > + (if (VECTOR_INTEGER_TYPE_P (type) > + /* First check whether the target has the same mode for vector > + comparison results as it's operands do. */ > + && TYPE_MODE (bt) == TYPE_MODE (type) > + /* Then check to see if the target is able to expand the comparison > + with the given type later on, otherwise we may ICE. */ > + && expand_vec_cmp_expr_p (type, bt, { GT_EXPR }))
No need to wrap GT_EXPR in { } > + (with { tree cst = uniform_integer_cst_p (@1); } if you declare 'cst' above where you declare 'bt' you can do && (cst = uniform_integer_cst_p (@1))) combining it with the if above, and the one below, simplifying indents and flow. OK with that change. I guess it might happen that the scalar transform expands badly on some targets? Please have an eye on problems that come up. Thanks, Richard. > + (if (cst && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1)) > + (view_convert (gt:bt @0 { zeros; })))))))))) > + > /* Fold (C1/X)*C2 into (C1*C2)/X. */ > (simplify > (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2) > diff --git a/gcc/testsuite/gcc.dg/signbit-2.c > b/gcc/testsuite/gcc.dg/signbit-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc30176c96a669bb > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/signbit-2.c > @@ -0,0 +1,19 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */ > + > +#include <stdint.h> > + > +void fun1(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (-x[i]) >> 31; > +} > + > +void fun2(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (-x[i]) >> 30; > +} > + > +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } > } */ > +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ > diff --git a/gcc/testsuite/gcc.dg/signbit-3.c > b/gcc/testsuite/gcc.dg/signbit-3.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..19e9c06c349b3287610f817628f00938ece60bf7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/signbit-3.c > @@ -0,0 +1,13 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */ > + > +#include <stdint.h> > + > +void fun1(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (-x[i]) >> 31; > +} > + > +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */ > +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ > diff --git a/gcc/testsuite/gcc.dg/signbit-4.c > b/gcc/testsuite/gcc.dg/signbit-4.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..bc459ba60a760bdf49e94dbec762f378c24fe9b5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/signbit-4.c > @@ -0,0 +1,65 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -fwrapv" } */ > + > +#include <stdint.h> > +#include <limits.h> > +#include <stdio.h> > + > +#ifndef N > +#define N 65 > +#endif > + > +#ifndef TYPE > +#define TYPE int32_t > +#endif > + > +#ifndef DEBUG > +#define DEBUG 1 > +#endif > + > +#define BASE ((TYPE) -1 < 0 ? -126 : 4) > + > +__attribute__ ((noinline, noipa)) > +void fun1(TYPE *x, int n) > +{ > + for (int i = 0; i < n; i++) > + x[i] = (-x[i]) >> 31; > +} > + > +__attribute__ ((noinline, noipa, optimize("O0"))) > +void fun2(TYPE *x, int n) > +{ > + for (int i = 0; i < n; i++) > + x[i] = (-x[i]) >> 31; > +} > + > +int main () > +{ > + TYPE a[N]; > + TYPE b[N]; > + > + a[0] = INT_MIN; > + b[0] = INT_MIN; > + > + for (int i = 1; i < N; ++i) > + { > + a[i] = BASE + i * 13; > + b[i] = BASE + i * 13; > + if (DEBUG) > + printf ("%d: 0x%x\n", i, a[i]); > + } > + > + fun1 (a, N); > + fun2 (b, N); > + > + for (int i = 0; i < N; ++i) > + { > + if (DEBUG) > + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); > + > + if (a[i] != b[i]) > + __builtin_abort (); > + } > + return 0; > +} > + > diff --git a/gcc/testsuite/gcc.dg/signbit-5.c > b/gcc/testsuite/gcc.dg/signbit-5.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..22a92704773e3282759524b74d35196a477d43dd > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/signbit-5.c > @@ -0,0 +1,65 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3" } */ > + > +#include <stdint.h> > +#include <limits.h> > +#include <stdio.h> > + > +#ifndef N > +#define N 65 > +#endif > + > +#ifndef TYPE > +#define TYPE int32_t > +#endif > + > +#ifndef DEBUG > +#define DEBUG 1 > +#endif > + > +#define BASE ((TYPE) -1 < 0 ? -126 : 4) > + > +__attribute__ ((noinline, noipa)) > +void fun1(TYPE *x, int n) > +{ > + for (int i = 0; i < n; i++) > + x[i] = (-x[i]) >> 31; > +} > + > +__attribute__ ((noinline, noipa, optimize("O1"))) > +void fun2(TYPE *x, int n) > +{ > + for (int i = 0; i < n; i++) > + x[i] = (-x[i]) >> 31; > +} > + > +int main () > +{ > + TYPE a[N]; > + TYPE b[N]; > + > + a[0] = INT_MIN; > + b[0] = INT_MIN; > + > + for (int i = 1; i < N; ++i) > + { > + a[i] = BASE + i * 13; > + b[i] = BASE + i * 13; > + if (DEBUG) > + printf ("%d: 0x%x\n", i, a[i]); > + } > + > + fun1 (a, N); > + fun2 (b, N); > + > + for (int i = 0; i < N; ++i) > + { > + if (DEBUG) > + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); > + > + if (a[i] != b[i]) > + __builtin_abort (); > + } > + return 0; > +} > + > diff --git a/gcc/testsuite/gcc.dg/signbit-6.c > b/gcc/testsuite/gcc.dg/signbit-6.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..da186624cfa057dfc3780c8af4f6b1335ba07e7e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/signbit-6.c > @@ -0,0 +1,72 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1" } */ > + > +#include <stdint.h> > +#include <limits.h> > +#include <stdio.h> > + > +#ifndef N > +#define N 65 > +#endif > + > +#ifndef TYPE > +#define TYPE int32_t > +#endif > + > +#ifndef DEBUG > +#define DEBUG 1 > +#endif > + > +#define BASE ((TYPE) -1 < 0 ? -126 : 4) > + > +__attribute__ ((noinline, noipa)) > +void fun1(TYPE *x, int n) > +{ > + for (int i = 0; i < n; i++) > + x[i] = (-x[i]) >> 31; > +} > + > +__attribute__ ((noinline, noipa, optimize("O0"))) > +void fun2(TYPE *x, int n) > +{ > + for (int i = 0; i < n; i++) > + x[i] = (-x[i]) >> 31; > +} > + > +int main () > +{ > + TYPE a[N]; > + TYPE b[N]; > + > + a[0] = INT_MIN; > + b[0] = INT_MIN; > + > + for (int i = 1; i < N; ++i) > + { > + a[i] = BASE + i * 13; > + b[i] = BASE + i * 13; > + if (DEBUG) > + printf ("%d: 0x%x\n", i, a[i]); > + } > + > + fun1 (a, N); > + fun2 (b, N); > + > + if (DEBUG) > + printf ("%d = 0x%x == 0x%x\n", 0, a[0], b[0]); > + > + if (a[0] != 0x0 || b[0] != -1) > + __builtin_abort (); > + > + > + for (int i = 1; i < N; ++i) > + { > + if (DEBUG) > + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); > + > + if (a[i] != b[i]) > + __builtin_abort (); > + } > + return 0; > +} > + > diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c > b/gcc/testsuite/gcc.target/aarch64/signbit-1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27fe48503714447e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O3 --save-temps" } */ > + > +#include <stdint.h> > + > +void fun1(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (-x[i]) >> 31; > +} > + > +void fun2(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (-x[i]) >> 30; > +} > + > +/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */ > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)