On Mon, 11 Oct 2021, Tamar Christina wrote:

> Hi all,
> 
> Here's a new version of the patch.
> 
> > >>> " If an exceptional condition occurs during the evaluation of an
> > >>> expression
> > >> (that is, if the result is not mathematically defined or not in the
> > >> range of representable values for its type), the behavior is undefined."
> > >>>
> > >>> So it should still be acceptable to do in this case.
> > >>
> > >> -fwrapv
> > >
> > > If I understand correctly, you're happy with this is I guard it on ! 
> > > flag_wrapv ?
> > 
> > I did some more digging.  Right shift of a negative value is IMP_DEF (not
> > UNDEF - this keeps catching me out).  So yes, wrapping this with !wrapv
> > would address my concern.
> > 
> > I've not reviewed the patch itself, though.  I've never even written a patch
> > for match.pd, so don't feel qualified to do that.
> 
> No problem, thanks for catching this! I'm sure one of the Richards will 
> review it when
> they have a chance.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no regressions.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * match.pd: New negate+shift pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/signbit-2.c: New test.
>       * gcc.dg/signbit-3.c: New test.
>       * gcc.target/aarch64/signbit-1.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..3d48eda826f889483a83267409c3f278ee907b57
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -826,6 +826,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>      { tree utype = unsigned_type_for (type); }
>      (convert (rshift (lshift (convert:utype @0) @2) @3))))))
>  
> +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
> +(for cst (INTEGER_CST VECTOR_CST)
> + (simplify
> +  (rshift (negate:s @0) cst@1)
> +   (if (!flag_wrapv)

Don't test flag_wrapv directly, instead use the appropriate
TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure
what we are protecting against?  Right-shift of signed integers
is implementation-defined and GCC treats it as you'd expect,
sign-extending the result.

> +    (with { tree ctype = TREE_TYPE (@0);
> +         tree stype = TREE_TYPE (@1);
> +         tree bt = truth_type_for (ctype); }
> +     (switch
> +      /* Handle scalar case.  */
> +      (if (INTEGRAL_TYPE_P (ctype)
> +        && !VECTOR_TYPE_P (ctype)
> +        && !TYPE_UNSIGNED (ctype)
> +        && canonicalize_math_after_vectorization_p ()
> +        && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> +       (convert:bt (gt:bt @0 { build_zero_cst (stype); })))

I'm not sure why the result is of type 'bt' rather than the
original type of the expression?

In that regard for non-vectors we'd have to add the sign
extension from unsigned bool, in the vector case we'd
hope the type of the comparison is correct.  I think in
both cases it might be convenient to use

  (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst 
(ctype); } { build_zero_cost (ctype); })

to compute the correct result and rely on (cond ..) simplifications
to simplify that if possible.

Btw, 'stype' should be irrelevant - you need to look at
the precision of 'ctype', no?

Richard.

> +      /* Handle vector case with a scalar immediate.  */
> +      (if (VECTOR_INTEGER_TYPE_P (ctype)
> +        && !VECTOR_TYPE_P (stype)
> +        && !TYPE_UNSIGNED (ctype)
> +           && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> +       (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
> +      /* Handle vector case with a vector immediate.   */
> +      (if (VECTOR_INTEGER_TYPE_P (ctype)
> +        && VECTOR_TYPE_P (stype)
> +        && !TYPE_UNSIGNED (ctype)
> +        && uniform_vector_p (@1))
> +       (with { tree cst = vector_cst_elt (@1, 0);
> +            tree t = TREE_TYPE (cst); }
> +        (if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
> +         (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))))))))))
> +
>  /* Fold (C1/X)*C2 into (C1*C2)/X.  */
>  (simplify
>   (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> b/gcc/testsuite/gcc.dg/signbit-2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc30176c96a669bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
> +
> +#include <stdint.h>
> +
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } 
> } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-3.c 
> b/gcc/testsuite/gcc.dg/signbit-3.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..19e9c06c349b3287610f817628f00938ece60bf7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
> +
> +#include <stdint.h>
> +
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c 
> b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27fe48503714447e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +#include <stdint.h>
> +
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to