This is the latest iteration of my patch from August 2020
https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552391.html
incorporating feedback and suggestions from reviewers.

This patch to match.pd optimizes away bit permutation operations,
specifically bswap and rotate, in calls to popcount and parity.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2023-05-10  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * match.pd <popcount optimizations>: Simplify popcount(bswap(x))
        as popcount(x).  Simplify popcount(rotate(x,y)) as popcount(x).
        <parity optimizations>:  Simplify parity(bswap(x)) as parity(x).
        Simplify parity(rotate(x,y)) as parity(x).

gcc/testsuite/ChangeLog
        * gcc.dg/fold-parity-6.c: New test.
        * gcc.dg/fold-parity-7.c: New test.
        * gcc.dg/fold-popcount-6.c: New test.
        * gcc.dg/fold-popcount-7.c: New test.


Thanks again,
Roger
--

diff --git a/gcc/match.pd b/gcc/match.pd
index ceae1c3..bc083be 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7766,6 +7766,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* popcount(bswap(x)) is popcount(x).  */
+(for popcount (POPCOUNT)
+  (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+             BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
+    (simplify
+      (popcount (convert?@0 (bswap:s@1 @2)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+          && INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+       (with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0));
+               unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); }
+         (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1)))
+           (popcount @2)))))))
+
+/* popcount(rotate(X Y)) is popcount(X).  */
+(for popcount (POPCOUNT)
+  (for rot (lrotate rrotate)
+    (simplify
+      (popcount (convert?@0 (rot:s@1 @2 @3)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+          && INTEGRAL_TYPE_P (TREE_TYPE (@1))  
+          && (GIMPLE || !TREE_SIDE_EFFECTS (@3)))
+       (with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0));
+               unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); }
+         (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1)))
+           (popcount @2)))))))
+
 /* Canonicalize POPCOUNT(x)&1 as PARITY(X).  */
 (simplify
   (bit_and (POPCOUNT @0) integer_onep)
@@ -7777,6 +7803,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (PARITY (bit_not @0))
   (PARITY @0))
 
+/* parity(bswap(x)) is parity(x).  */
+(for parity (PARITY)
+  (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+             BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
+    (simplify
+      (parity (convert?@0 (bswap:s@1 @2)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+          && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+          && TYPE_PRECISION (TREE_TYPE (@0))
+             >= TYPE_PRECISION (TREE_TYPE (@1)))
+       (parity @2)))))
+
+/* parity(rotate(X Y)) is parity(X).  */
+(for parity (PARITY)
+  (for rot (lrotate rrotate)
+    (simplify
+      (parity (convert?@0 (rot:s@1 @2 @3)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+          && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+          && (GIMPLE || !TREE_SIDE_EFFECTS (@3))
+          && TYPE_PRECISION (TREE_TYPE (@0))
+             >= TYPE_PRECISION (TREE_TYPE (@1)))
+       (parity @2)))))
+
 /* parity(X)^parity(Y) is parity(X^Y).  */
 (simplify
   (bit_xor (PARITY:s @0) (PARITY:s @1))
diff --git a/gcc/testsuite/gcc.dg/fold-parity-6.c 
b/gcc/testsuite/gcc.dg/fold-parity-6.c
new file mode 100644
index 0000000..623afb9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-6.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  return __builtin_parity (__builtin_bswap32(x));
+#elif __SIZEOF_INT__ == 2
+  return __builtin_parity (__builtin_bswap16(x));
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  return __builtin_parityl (__builtin_bswap64(x));
+#elif __SIZEOF_LONG__ == 4
+  return __builtin_parityl (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  return __builtin_parityll (__builtin_bswap64(x));
+#elif __SIZEOF_LONG_LONG__ == 4
+  return __builtin_parityll (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-parity-7.c 
b/gcc/testsuite/gcc.dg/fold-parity-7.c
new file mode 100644
index 0000000..c08cdee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-7.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  unsigned int y = (x>>4) | (x<<28);
+  return __builtin_parity(y);
+#elif __SIZEOF_INT__ == 2
+  unsigned int y = (x>>4) | (x<<12);
+  return __builtin_parity(y);
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  unsigned long y = (x>>4) | (x<<60);
+  return __builtin_parityl (y);
+#elif __SIZEOF_LONG__ == 4
+  unsigned long y = (x>>4) | (x<<28);
+  return __builtin_parityl (y);
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  unsigned long long y = (x>>4) | (x<<60);
+  return __builtin_parityll (y);
+#elif __SIZEOF_LONG_LONG__ == 4
+  unsigned long long y = (x>>4) | (x<<28);
+  return __builtin_parityll (y);
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-6.c 
b/gcc/testsuite/gcc.dg/fold-popcount-6.c
new file mode 100644
index 0000000..e7f38d0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-6.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  return __builtin_popcount (__builtin_bswap32(x));
+#elif __SIZEOF_INT__ == 2
+  return __builtin_popcount (__builtin_bswap16(x));
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  return __builtin_popcountl (__builtin_bswap64(x));
+#elif __SIZEOF_LONG__ == 4
+  return __builtin_popcountl (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  return __builtin_popcountll (__builtin_bswap64(x));
+#elif __SIZEOF_LONG_LONG__ == 4
+  return __builtin_popcountll (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-7.c 
b/gcc/testsuite/gcc.dg/fold-popcount-7.c
new file mode 100644
index 0000000..dcfdf5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-7.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  unsigned int y = (x>>4) | (x<<28);
+  return __builtin_popcount(y);
+#elif __SIZEOF_INT__ == 2
+  unsigned int y = (x>>4) | (x<<12);
+  return __builtin_popcount(y);
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  unsigned long y = (x>>4) | (x<<60);
+  return __builtin_popcountl (y);
+#elif __SIZEOF_LONG__ == 4
+  unsigned long y = (x>>4) | (x<<28);
+  return __builtin_popcountl (y);
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  unsigned long long y = (x>>4) | (x<<60);
+  return __builtin_popcountll (y);
+#elif __SIZEOF_LONG_LONG__ == 4
+  unsigned long long y = (x>>4) | (x<<28);
+  return __builtin_popcountll (y);
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */

Reply via email to