Fixed the problems and committed to trunk. Thanks, Di Zhao
> -----Original Message----- > From: Richard Biener <richard.guent...@gmail.com> > Sent: Friday, May 10, 2024 8:56 PM > To: Di Zhao OS <diz...@os.amperecomputing.com> > Cc: gcc-patches@gcc.gnu.org > Subject: Re: [PATCH] tree-optimization/114760 - check variants of >> and << in > loop-niter > > On Fri, May 10, 2024 at 12:55 PM Di Zhao OS > <diz...@os.amperecomputing.com> wrote: > > > > This patch tries to fix pr114760 by checking for the > > variants explicitly. When recognizing bit counting idiom, > > include pattern "x * 2" for "x << 1", and "x / 2" for > > "x >> 1" (given x is unsigned). > > > > Bootstrapped and tested on x86_64-linux-gnu. > > > > Thanks, > > Di Zhao > > > > --- > > > > gcc/ChangeLog: > > PR tree-optimization/114760 > > * tree-ssa-loop-niter.cc (is_lshift_by_1): New function > > to check if STMT is equivalent to x << 1. > > (is_rshift_by_1): New function to check if STMT is > > equivalent to x >> 1. > > (number_of_iterations_cltz): Enhance the identification > > of logical shift by one. > > (number_of_iterations_cltz_complement): Enhance the > > identification of logical shift by one. > > > > gcc/testsuite/ChangeLog: > > PR tree-optimization/114760 > > * gcc.dg/tree-ssa/pr114760-1.c: New test. > > * gcc.dg/tree-ssa/pr114760-2.c: New test. > > --- > > gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c | 69 ++++++++++++++++++++++ > > gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c | 20 +++++++ > > gcc/tree-ssa-loop-niter.cc | 56 +++++++++++++----- > > 3 files changed, 131 insertions(+), 14 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > > new file mode 100644 > > index 00000000000..9f10ccc3b51 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > > @@ -0,0 +1,69 @@ > > +/* PR tree-optimization/114760 */ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target clz } */ > > +/* { dg-require-effective-target ctz } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > + > > +unsigned > > +ntz32_1 (unsigned x) > > +{ > > + int n = 32; > > + while (x != 0) > > + { > > + n = n - 1; > > + x = x * 2; > > + } > > + return n; > > +} > > + > > +unsigned > > +ntz32_2 (unsigned x) > > +{ > > + int n = 32; > > + while (x != 0) > > + { > > + n = n - 1; > > + x = x + x; > > + } > > + return n; > > +} > > + > > +unsigned > > +ntz32_3 (unsigned x) > > +{ > > + int n = 32; > > + while (x != 0) > > + { > > + n = n - 1; > > + x = x << 1; > > + } > > + return n; > > +} > > + > > +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) > > +int > > +nlz32_1 (unsigned int b) { > > + int c = PREC; > > + > > + while (b != 0) { > > + b >>= 1; > > + c --; > > + } > > + > > + return c; > > +} > > + > > +int > > +nlz32_2 (unsigned int b) { > > + int c = PREC; > > + > > + while (b != 0) { > > + b /= 2; > > + c --; > > + } > > + > > + return c; > > +} > > + > > +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 3 > "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 > "optimized" } } */ > > \ No newline at end of file > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > > new file mode 100644 > > index 00000000000..e1b4c4b1338 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > > @@ -0,0 +1,20 @@ > > +/* PR tree-optimization/114760 */ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target clz } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > + > > +// Check that for signed type, there's no CLZ. > > +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) > > +int > > +no_nlz32 (int b) { > > + int c = PREC; > > + > > + while (b != 0) { > > + b /= 2; > > + c --; > > + } > > + > > + return c; > > +} > > + > > +/* { dg-final { scan-tree-dump-not "__builtin_ctz|\\.CLZ" "optimized" } } > */ > > \ No newline at end of file > > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc > > index 0fde07e626f..1d99264949b 100644 > > --- a/gcc/tree-ssa-loop-niter.cc > > +++ b/gcc/tree-ssa-loop-niter.cc > > @@ -2303,6 +2303,38 @@ build_cltz_expr (tree src, bool leading, bool > define_at_zero) > > return call; > > } > > > > +/* Returns true if STMT is equivalent to x << 1. */ > > + > > +static bool > > +is_lshift_by_1 (gimple *stmt) > > You are checking for gimple-assign before calling these so please > use a 'gassign *' typed argument. > > > +{ > > + if (gimple_assign_rhs_code (stmt) == LSHIFT_EXPR > > + && integer_onep (gimple_assign_rhs2 (stmt))) > > + return true; > > + if (gimple_assign_rhs_code (stmt) == MULT_EXPR > > + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST > > + && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) > > You need to check for tree_fits_shwi_p (wich also checks for INTEGER_CST) > before using tree_to_shwi. > > Ok with this change (to both functions). > > Thanks, > Richard. > > > + return true; > > + return false; > > +} > > + > > +/* Returns true if STMT is equivalent to x >> 1. */ > > + > > +static bool > > +is_rshift_by_1 (gimple *stmt) > > +{ > > + if (!TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (stmt)))) > > + return false; > > + if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR > > + && integer_onep (gimple_assign_rhs2 (stmt))) > > + return true; > > + if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR > > + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST > > + && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) > > + return true; > > + return false; > > +} > > + > > /* See comment below for number_of_iterations_bitcount. > > For c[lt]z, we have: > > > > @@ -2400,14 +2432,12 @@ number_of_iterations_cltz (loop_p loop, edge exit, > > > > /* Make sure iv_2_stmt is a logical shift by one stmt: > > iv_2 = iv_1 {<<|>>} 1 */ > > - if (!is_gimple_assign (iv_2_stmt) > > - || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR > > - && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR > > - || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs > > (iv_2_stmt))))) > > - || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) > > + if (!is_gimple_assign (iv_2_stmt)) > > + return false; > > + bool left_shift = false; > > + if (!((left_shift = is_lshift_by_1 (iv_2_stmt)) > > + || is_rshift_by_1 (iv_2_stmt))) > > return false; > > - > > - bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); > > > > tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); > > > > @@ -2516,14 +2546,12 @@ number_of_iterations_cltz_complement (loop_p loop, > edge exit, > > > > /* Make sure iv_2_stmt is a logical shift by one stmt: > > iv_2 = iv_1 {>>|<<} 1 */ > > - if (!is_gimple_assign (iv_2_stmt) > > - || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR > > - && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR > > - || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs > > (iv_2_stmt))))) > > - || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) > > + if (!is_gimple_assign (iv_2_stmt)) > > + return false; > > + bool left_shift = false; > > + if (!((left_shift = is_lshift_by_1 (iv_2_stmt)) > > + || is_rshift_by_1 (iv_2_stmt))) > > return false; > > - > > - bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); > > > > tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); > > > > -- > > 2.25.1 > > > > > >