On September 10, 2019 9:45:28 AM GMT+02:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >The following patch optimizes (A / (cast) (1 << B)) -> (A >> B) >in addition to already supported (A / (1 << B)) -> (A >> B). > >The patch only supports same precision cast (i.e. a sign change), >or widening cast, in that case either zero extension (then the extended >value is known to be a power of two and all is fine), or sign extension >if the first operand has all upper bits starting with the sign bit of >the >narrower type clear. That is because (unsigned long long) (1 << 31) >is 0xffffffff80000000ULL and we need to make sure that dividing by that >huge value is equal to >> 31. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Richard. >2019-09-10 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/91680 > * match.pd ((A / (1 << B)) -> (A >> B)): Allow widening cast from > the shift type to type. > > * gcc.dg/tree-ssa/pr91680.c: New test. > * g++.dg/torture/pr91680.C: New test. > >--- gcc/match.pd.jj 2019-09-09 17:33:08.551582878 +0200 >+++ gcc/match.pd 2019-09-09 20:05:25.966107441 +0200 >@@ -305,13 +305,29 @@ (define_operator_list COND_TERNARY > /* (A / (1 << B)) -> (A >> B). > Only for unsigned A. For signed A, this would not preserve rounding > toward zero. >- For example: (-1 / ( 1 << B)) != -1 >> B. */ >+ For example: (-1 / ( 1 << B)) != -1 >> B. >+ Also also widening conversions, like: >+ (A / (unsigned long long) (1U << B)) -> (A >> B) >+ or >+ (A / (unsigned long long) (1 << B)) -> (A >> B). >+ If the left shift is signed, it can be done only if the upper bits >+ of A starting from shift's type sign bit are zero, as >+ (unsigned long long) (1 << 31) is -2147483648ULL, not >2147483648ULL, >+ so it is valid only if A >> 31 is zero. */ > (simplify >- (trunc_div @0 (lshift integer_onep@1 @2)) >+ (trunc_div @0 (convert? (lshift integer_onep@1 @2))) > (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) > && (!VECTOR_TYPE_P (type) > || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) >- || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))) >+ || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) >+ && (useless_type_conversion_p (type, TREE_TYPE (@1)) >+ || (element_precision (type) >= element_precision (TREE_TYPE (@1)) >+ && (TYPE_UNSIGNED (TREE_TYPE (@1)) >+ || (element_precision (type) >+ == element_precision (TREE_TYPE (@1))) >+ || (get_nonzero_bits (@0) >+ & wi::mask (element_precision (TREE_TYPE (@1)) - 1, true, >+ element_precision (type))) == 0)))) > (rshift @0 @2))) > > /* Preserve explicit divisions by 0: the C++ front-end wants to detect >--- gcc/testsuite/gcc.dg/tree-ssa/pr91680.c.jj 2019-09-09 >20:18:04.349619827 +0200 >+++ gcc/testsuite/gcc.dg/tree-ssa/pr91680.c 2019-09-09 >20:18:33.922171856 +0200 >@@ -0,0 +1,37 @@ >+/* PR middle-end/91680 */ >+/* { dg-do compile { target { ilp32 || lp64 } } } */ >+/* { dg-options "-O2 -fdump-tree-forwprop1" } */ >+/* { dg-final { scan-tree-dump-times " / " 1 "forwprop1" } } */ >+/* { dg-final { scan-tree-dump-times " >> " 3 "forwprop1" } } */ >+ >+__attribute__((noipa)) unsigned long long >+foo (unsigned char x) >+{ >+ unsigned long long q = 1 << x; >+ return 256 / q; >+} >+ >+__attribute__((noipa)) unsigned long long >+bar (unsigned char x) >+{ >+ unsigned long long q = 1U << x; >+ return 256 / q; >+} >+ >+__attribute__((noipa)) unsigned long long >+baz (unsigned char x, unsigned long long y) >+{ >+ /* This can't be optimized, at least not in C++ and maybe not >+ in C89, because for x 31 q is -2147483648ULL, not >+ 2147483648ULL, and e.g. 2147483648ULL >> 31 is 1, while >+ 2147483648ULL / -2147483648ULL is 0. */ >+ unsigned long long q = 1 << x; >+ return y / q; >+} >+ >+__attribute__((noipa)) unsigned long long >+qux (unsigned char x, unsigned long long y) >+{ >+ unsigned long long q = 1U << x; >+ return y / q; >+} >--- gcc/testsuite/g++.dg/torture/pr91680.C.jj 2019-09-09 >20:25:51.775539166 +0200 >+++ gcc/testsuite/g++.dg/torture/pr91680.C 2019-09-09 >20:26:18.610132667 +0200 >@@ -0,0 +1,35 @@ >+/* PR middle-end/91680 */ >+/* { dg-do run { target { ilp32 || lp64 } } } */ >+ >+extern "C" void abort (); >+ >+#include "../../gcc.dg/tree-ssa/pr91680.c" >+ >+int >+main () >+{ >+ unsigned char i; >+ for (i = 0; i < __SIZEOF_INT__ * __CHAR_BIT__; i++) >+ { >+ volatile unsigned long long q = 1 << i; >+ if (foo (i) != 256 / q) >+ abort (); >+ q = 1U << i; >+ if (bar (i) != 256 / q) >+ abort (); >+ q = 1 << i; >+ if (baz (i, (1U << i) - 1) != ((1U << i) - 1) / q) >+ abort (); >+ if (baz (i, 1U << i) != (1U << i) / q) >+ abort (); >+ if (baz (i, -1) != -1 / q) >+ abort (); >+ q = 1U << i; >+ if (qux (i, (1U << i) - 1) != ((1U << i) - 1) / q) >+ abort (); >+ if (qux (i, 1U << i) != (1U << i) / q) >+ abort (); >+ if (qux (i, -1) != -1 / q) >+ abort (); >+ } >+} > > Jakub