On Wed, 29 Oct 2025, Robin Dapp wrote:

> Hi,
> 
> When niter runs after the copy-header pass it sometimes fails to
> simplify assumptions in a ctz loop.
> As the assumption is a simple nonzero test here we can have
> ranger get us the range of the shifted expression, then verify that
> this range is nonzero.
> 
> This helps recognize a ctz loop in 502.gcc's compute_transp.
> 
> Changes from v2/v3:
>  - Enable ranger in sccp.
>  - Don't use path-based ranger.
> 
> There is minor fallout in pr41488 where the final assembly is similar but the 
> expected scev simplification happens earlier than before.  I disabled sccp 
> for 
> that test.
> 
> Bootstrapped and regtested on x86 and power10.  Regtested on aarch64 and 
> rv64gcv_zvl512b.

LGTM, please leave Andrew a chance to comment on the ranger bits.

Thanks,
Richard.

> Regards
>  Robin
> 
>       PR/tree-optimization 122207
> 
> gcc/ChangeLog:
> 
>       * tree-ssa-loop-niter.cc (shifted_range_nonzero_p): New
>       function.
>       (number_of_iterations_cltz): Call new function.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/tree-ssa/ctz-char.c: Remove -fno-tree-ch.
>       * gcc.dg/tree-ssa/ctz-complement-char.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-complement-int.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-complement-long-long.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-complement-long.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-int.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-long-long.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-long.c: Ditto.
>       * gcc.dg/tree-ssa/ctz-ch.c: New test.
>       * gcc.dg/pr41488.c: Add -fno-tree-scev-cprop.
> ---
>  gcc/testsuite/gcc.dg/pr41488.c                |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c        | 23 +++++
>  gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c      |  2 +-
>  .../gcc.dg/tree-ssa/ctz-complement-char.c     |  2 +-
>  .../gcc.dg/tree-ssa/ctz-complement-int.c      |  2 +-
>  .../tree-ssa/ctz-complement-long-long.c       |  2 +-
>  .../gcc.dg/tree-ssa/ctz-complement-long.c     |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c       |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c      |  2 +-
>  gcc/tree-ssa-loop-niter.cc                    | 93 ++++++++++++++++++-
>  gcc/tree-ssa-loop.cc                          |  5 +
>  12 files changed, 127 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
> 
> diff --git a/gcc/testsuite/gcc.dg/pr41488.c b/gcc/testsuite/gcc.dg/pr41488.c
> index 1e4bf19c7da..a7ba3672efe 100644
> --- a/gcc/testsuite/gcc.dg/pr41488.c
> +++ b/gcc/testsuite/gcc.dg/pr41488.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-ivcanon-scev" } */
> +/* { dg-options "-O2 -fno-tree-scev-cprop -fdump-tree-ivcanon-scev" } */
>  
>  struct struct_t
>  {
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
> new file mode 100644
> index 00000000000..5d725979971
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +typedef unsigned long BITMAP_WORD;
> +
> +bool
> +bmp_iter_set (BITMAP_WORD bits, unsigned *bit_no)
> +{
> +  /* If our current word is nonzero, it contains the bit we want.  */
> +  if (bits)
> +    {
> +      while (!(bits & 1))
> +     {
> +       bits >>= 1;
> +       *bit_no += 1;
> +     }
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } 
> } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
> index 3cd166acbd4..fa8b7f39de4 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctz } */
> -/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
> index b9afe8852d8..5ebc3213169 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctz } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
> index d2702a65daf..0ce4b6beaa7 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctz } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
> index 1ea0d5d7d9f..f98bec039b3 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctzll } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
> index 80fb02dcfa6..8edb3728131 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctzl } */
> -/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
> index 7f63493eb73..2bf3ae69b93 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctz } */
> -/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
> index 924f61b76f0..2e159485cb9 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctzll } */
> -/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
> index 178945daa8a..2e3be652a0b 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target ctzl } */
> -/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>  
>  #define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
>  
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index 6e130862549..cc763839edc 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -2321,6 +2321,48 @@ is_rshift_by_1 (gassign *stmt)
>    return false;
>  }
>  
> +/* Helper for number_of_iterations_cltz that uses ranger to determine
> +   if SRC's range, shifted left (when LEFT_SHIFT is true) or right
> +   by NUM_IGNORED_BITS, is guaranteed to be != 0 on LOOP's preheader
> +   edge.
> +   Return true if so or false otherwise.  */
> +
> +static bool
> +shifted_range_nonzero_p (loop_p loop, tree src,
> +                      bool left_shift, int num_ignored_bits)
> +{
> +  int_range_max r (TREE_TYPE (src));
> +  gcc_assert (num_ignored_bits >= 0);
> +
> +  if (get_range_query (cfun)->range_on_edge
> +      (r, loop_preheader_edge (loop), src)
> +      && !r.varying_p ()
> +      && !r.undefined_p ())
> +    {
> +      if (num_ignored_bits)
> +     {
> +       range_op_handler op (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR);
> +       int_range_max shifted_range (TREE_TYPE (src));
> +       wide_int shift_count = wi::shwi (num_ignored_bits,
> +                                        TYPE_PRECISION (TREE_TYPE
> +                                                        (src)));
> +       int_range_max shift_amount
> +         (TREE_TYPE (src), shift_count, shift_count);
> +
> +       if (op.fold_range (shifted_range, TREE_TYPE (src), r,
> +                          shift_amount))
> +         r = shifted_range;
> +     }
> +
> +      /* If the range does not contain zero we are good.  */
> +      if (!range_includes_zero_p (r))
> +     return true;
> +    }
> +
> +  return false;
> +}
> +
> +
>  /* See comment below for number_of_iterations_bitcount.
>     For c[lt]z, we have:
>  
> @@ -2438,6 +2480,9 @@ number_of_iterations_cltz (loop_p loop, edge exit,
>    tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
>    int src_precision = TYPE_PRECISION (TREE_TYPE (src));
>  
> +  /* Save the original SSA name before preprocessing for ranger queries.  */
> +  tree unshifted_src = src;
> +
>    /* Apply any needed preprocessing to src.  */
>    int num_ignored_bits;
>    if (left_shift)
> @@ -2463,10 +2508,52 @@ number_of_iterations_cltz (loop_p loop, edge exit,
>  
>    expr = fold_convert (unsigned_type_node, expr);
>  
> -  tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
> -                               build_zero_cst (TREE_TYPE (src)));
> +  /* If the copy-header (ch) pass peeled one iteration we're shifting
> +     SRC by preprocessing it above.
> +
> +     A loop like
> +      if (bits)
> +     {
> +       while (!(bits & 1))
> +         {
> +           bits >>= 1;
> +           cnt += 1;
> +         }
> +       return cnt;
> +     }
> +     ch (roughly) transforms into:
> +      if (bits)
> +     {
> +       if (!(bits & 1)
> +         {
> +           do
> +             {
> +               bits >>= 1;
> +               cnt += 1;
> +             } while (!(bits & 1));
> +         }
> +        else
> +          cnt = 1;
> +       return cnt;
> +     }
> +
> +     Then, our preprocessed SRC (that is used for c[tl]z computation)
> +     will be bits >> 1, and the assumption is bits >> 1 != 0.  */
> +
> +  tree assumptions;
> +  if (shifted_range_nonzero_p (loop, unshifted_src,
> +                            left_shift, num_ignored_bits))
> +    assumptions = boolean_true_node;
> +  else
> +    {
> +      /* If ranger couldn't prove the assumption, try
> +      simplify_using_initial_conditions.  */
> +      assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
> +                              build_zero_cst (TREE_TYPE (src)));
> +      assumptions = simplify_using_initial_conditions (loop, assumptions);
> +    }
>  
> -  niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
> +  niter->assumptions = assumptions;
>    niter->may_be_zero = boolean_false_node;
>    niter->niter = simplify_using_initial_conditions (loop, expr);
>  
> diff --git a/gcc/tree-ssa-loop.cc b/gcc/tree-ssa-loop.cc
> index 5629524afb2..dc4b560e924 100644
> --- a/gcc/tree-ssa-loop.cc
> +++ b/gcc/tree-ssa-loop.cc
> @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tm_p.h"
>  #include "fold-const.h"
>  #include "gimple-iterator.h"
> +#include "gimple-range.h"
>  #include "tree-ssa-loop-ivopts.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "tree-ssa-loop-niter.h"
> @@ -404,11 +405,15 @@ pass_scev_cprop::execute (function *)
>  {
>    bool any = false;
>  
> +  enable_ranger (cfun);
> +
>    /* Perform final value replacement in loops, in case the replacement
>       expressions are cheap.  */
>    for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
>      any |= final_value_replacement_loop (loop);
>  
> +  disable_ranger (cfun);
> +
>    return any ? TODO_cleanup_cfg | TODO_update_ssa_only_virtuals : 0;
>  }
>  
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to