Hi,

When niter runs after the copy-header pass it sometimes fails to
simplify assumptions in a ctz loop.
As the assumption is a simple nonzero test here and we
know that a val >> 1 != 0 (or val << 1 != 0) if val & 1 != 0 we
can elide the shift when simplifying the assumption.

This helps recognize a ctz loop in 502.gcc's compute_transp.

Regtested on x86, power10, and rv64gcbv_zvl512b.

Regards
 Robin

        PR/tree-optimization 122207

gcc/ChangeLog:

        * tree-ssa-loop-niter.cc (number_of_iterations_cltz): Use
        unshifted src.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/ctz-char.c: Remove -fno-tree-ch.
        * gcc.dg/tree-ssa/ctz-int.c: Ditto.
        * gcc.dg/tree-ssa/ctz-long-long.c: Ditto.
        * gcc.dg/tree-ssa/ctz-long.c: Ditto.
        * gcc.dg/tree-ssa/ctz-ch.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c        | 23 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c      |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c       |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c      |  2 +-
 gcc/tree-ssa-loop-niter.cc                    | 45 ++++++++++++++++++-
 6 files changed, 70 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
new file mode 100644
index 00000000000..5d725979971
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef unsigned long BITMAP_WORD;
+
+bool
+bmp_iter_set (BITMAP_WORD bits, unsigned *bit_no)
+{
+  /* If our current word is nonzero, it contains the bit we want.  */
+  if (bits)
+    {
+      while (!(bits & 1))
+       {
+         bits >>= 1;
+         *bit_no += 1;
+       }
+      return true;
+    }
+
+  return false;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } 
*/
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
index 3cd166acbd4..fa8b7f39de4 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctz } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
index 7f63493eb73..2bf3ae69b93 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctz } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
index 924f61b76f0..2e159485cb9 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctzll } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
index 178945daa8a..2e3be652a0b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctzl } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
 
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 6e130862549..bda583e9e2d 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2438,6 +2438,9 @@ number_of_iterations_cltz (loop_p loop, edge exit,
   tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
   int src_precision = TYPE_PRECISION (TREE_TYPE (src));
 
+  /* Save the non-preprocessed SRC.  */
+  tree unshifted_src = src;
+
   /* Apply any needed preprocessing to src.  */
   int num_ignored_bits;
   if (left_shift)
@@ -2463,8 +2466,46 @@ number_of_iterations_cltz (loop_p loop, edge exit,
 
   expr = fold_convert (unsigned_type_node, expr);
 
-  tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
-                                 build_zero_cst (TREE_TYPE (src)));
+  /* If the copy-header (ch) pass peeled one iteration we're shifting
+     SRC by preprocessing it above.
+
+     A loop like
+      if (bits)
+       {
+         while (!(bits & 1))
+           {
+             bits >>= 1;
+             cnt += 1;
+           }
+         return cnt;
+       }
+     ch (roughly) transforms into:
+      if (bits)
+       {
+         if (!(bits & 1)
+           {
+             do
+               {
+                 bits >>= 1;
+                 cnt += 1;
+               } while (!(bits & 1));
+           }
+          else
+            cnt = 1;
+         return cnt;
+       }
+
+     Then, our preprocessed SRC (that is used for c[tl]z computation)
+     will be bits >> 1, the assumption bits >> 1 != 0.
+     In simplify_using_initial_conditions we walk dominating conditions
+     to check if the assumption is unconditional.
+     As bits >>/<< 1 != 0 is true when bits != 0 we can use the
+     unshifted src to simplify assumptions instead.
+
+     A range query would work as well but since the boundary conditions
+     are very restricted this simpler approach is sufficient.  */
+  tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, unshifted_src,
+                                 build_zero_cst (TREE_TYPE (unshifted_src)));
 
   niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
   niter->may_be_zero = boolean_false_node;
-- 
2.51.0

Reply via email to