> -----Original Message----- > From: Richard Earnshaw <richard.earns...@foss.arm.com> > Sent: Tuesday, October 5, 2021 1:56 PM > To: Tamar Christina <tamar.christ...@arm.com>; gcc-patches@gcc.gnu.org > Cc: nd <n...@arm.com>; rguent...@suse.de > Subject: Re: [PATCH]middle-end convert negate + right shift into compare > greater. > > > > On 05/10/2021 13:50, Tamar Christina via Gcc-patches wrote: > > Hi All, > > > > This turns an inversion of the sign bit + arithmetic right shift into > > a comparison with 0. > > > > i.e. > > > > void fun1(int32_t *x, int n) > > { > > for (int i = 0; i < (n & -16); i++) > > x[i] = (-x[i]) >> 31; > > } > > > Notwithstanding that I think shifting a negative value right is unspecified > behaviour, I don't think this generates the same result when x[i] is INT_MIN > either, although negating that is also unspecified since it can't be > represented in an int. >
You're right that they are implementation defined, but I think most ISAs do have a sane Implementation of these two cases. At least both x86 and AArch64 just replicate the signbit and for negate do two complement negation. So INT_MIN works as expected and results in 0. But I'm happy to guard this behind some sort of target guard. Regards, Tamar > R. > > > now generates: > > > > .L3: > > ldr q0, [x0] > > cmgt v0.4s, v0.4s, #0 > > str q0, [x0], 16 > > cmp x0, x1 > > bne .L3 > > > > instead of: > > > > .L3: > > ldr q0, [x0] > > neg v0.4s, v0.4s > > sshr v0.4s, v0.4s, 31 > > str q0, [x0], 16 > > cmp x0, x1 > > bne .L3 > > > > Bootstrapped Regtested on aarch64-none-linux-gnu, > > x86_64-pc-linux-gnu and no regressions. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * match.pd: New negate+shift pattern. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/signbit-2.c: New test. > > * gcc.dg/signbit-3.c: New test. > > * gcc.target/aarch64/signbit-1.c: New test. > > > > --- inline copy of patch -- > > diff --git a/gcc/match.pd b/gcc/match.pd > > index > 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..581436fe36dbacdcb0c2720b7 > 190c96d14398143 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -826,6 +826,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > { tree utype = unsigned_type_for (type); } > > (convert (rshift (lshift (convert:utype @0) @2) @3)))))) > > > > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1. */ > > +(for cst (INTEGER_CST VECTOR_CST) > > + (simplify > > + (rshift (negate:s @0) cst@1) > > + (with { tree ctype = TREE_TYPE (@0); > > + tree stype = TREE_TYPE (@1); > > + tree bt = truth_type_for (ctype); } > > + (switch > > + /* Handle scalar case. */ > > + (if (INTEGRAL_TYPE_P (ctype) > > + && !VECTOR_TYPE_P (ctype) > > + && !TYPE_UNSIGNED (ctype) > > + && canonicalize_math_after_vectorization_p () > > + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1)) > > + (convert:bt (gt:bt @0 { build_zero_cst (stype); }))) > > + /* Handle vector case with a scalar immediate. */ > > + (if (VECTOR_INTEGER_TYPE_P (ctype) > > + && !VECTOR_TYPE_P (stype) > > + && !TYPE_UNSIGNED (ctype) > > + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1)) > > + (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))) > > + /* Handle vector case with a vector immediate. */ > > + (if (VECTOR_INTEGER_TYPE_P (ctype) > > + && VECTOR_TYPE_P (stype) > > + && !TYPE_UNSIGNED (ctype) > > + && uniform_vector_p (@1)) > > + (with { tree cst = vector_cst_elt (@1, 0); > > + tree t = TREE_TYPE (cst); } > > + (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1)) > > + (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))))))))) > > + > > /* Fold (C1/X)*C2 into (C1*C2)/X. */ > > (simplify > > (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2) > > diff --git a/gcc/testsuite/gcc.dg/signbit-2.c > > b/gcc/testsuite/gcc.dg/signbit- > 2.c > > new file mode 100644 > > index > 0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc > 30176c96a669bb > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/signbit-2.c > > @@ -0,0 +1,19 @@ > > +/* { dg-do assemble } */ > > +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */ > > + > > +#include <stdint.h> > > + > > +void fun1(int32_t *x, int n) > > +{ > > + for (int i = 0; i < (n & -16); i++) > > + x[i] = (-x[i]) >> 31; > > +} > > + > > +void fun2(int32_t *x, int n) > > +{ > > + for (int i = 0; i < (n & -16); i++) > > + x[i] = (-x[i]) >> 30; > > +} > > + > > +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized > > } } > */ > > +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ > > diff --git a/gcc/testsuite/gcc.dg/signbit-3.c > > b/gcc/testsuite/gcc.dg/signbit- > 3.c > > new file mode 100644 > > index > 0000000000000000000000000000000000000000..19e9c06c349b3287610f817628 > f00938ece60bf7 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/signbit-3.c > > @@ -0,0 +1,13 @@ > > +/* { dg-do assemble } */ > > +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */ > > + > > +#include <stdint.h> > > + > > +void fun1(int32_t *x, int n) > > +{ > > + for (int i = 0; i < (n & -16); i++) > > + x[i] = (-x[i]) >> 31; > > +} > > + > > +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */ > > +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ > > diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c > b/gcc/testsuite/gcc.target/aarch64/signbit-1.c > > new file mode 100644 > > index > 0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27 > fe48503714447e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do assemble } */ > > +/* { dg-options "-O3 --save-temps" } */ > > + > > +#include <stdint.h> > > + > > +void fun1(int32_t *x, int n) > > +{ > > + for (int i = 0; i < (n & -16); i++) > > + x[i] = (-x[i]) >> 31; > > +} > > + > > +void fun2(int32_t *x, int n) > > +{ > > + for (int i = 0; i < (n & -16); i++) > > + x[i] = (-x[i]) >> 30; > > +} > > + > > +/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */ > > > >