[Public]

Hi,

This patch add support for enabling POPCNT generation for 32-bit patterns from 
Hacker's delight.

Bootstrapped and tested on x86.

Thank You,

Reshma Roy

gcc/ChangeLog:

* match.pd:

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/popcount7.c: New test.
* gcc.dg/tree-ssa/popcount7_2.c: New test.
* gcc.dg/tree-ssa/popcount8.c: New test.
* gcc.dg/tree-ssa/popcount9.c: New test.


>From 513656c2a431fc5e7215bc9977a084e06f2d6e4b Mon Sep 17 00:00:00 2001
From: Reshma Roy <[email protected]>
Date: Thu, 11 Dec 2025 10:57:53 +0530
Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from
 Hacker's Delight

Pattern 1:
int Gia_WordCountOnes32c( uint32_t uword )
{
  uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
  uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
  uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
  uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
  return  (uword & 0x0000ffff) + (uword>>16);
  or
  return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF);
}

Pattern 2:
int pop(unsigned x) {
  x = x - ((x >> 1) & 0x55555555);
  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
  x = (x + (x >> 4)) & 0x0F0F0F0F;
  x = x + (x >> 8);
  x = x + (x >> 16);
  return x & 0x0000003F;
}

Pattern 3:
int pop(unsigned x) {
  x = x - ((x >> 1) & 0x55555555);
  x = x - 3*((x >> 2) & 0x33333333)
    x = (x + (x >> 4)) & 0x0F0F0F0F;
  x = x + (x >> 8);
  x = x + (x >> 16);
  return x & 0x0000003F;
}
---
 gcc/match.pd                                | 185 ++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c   |  23 +++
 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c |  23 +++
 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c   |  22 +++
 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c   |  22 +++
 5 files changed, 275 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c

diff --git a/gcc/match.pd b/gcc/match.pd
index bf410a75f5f..380ec0ee694 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
 #endif

+#if GIMPLE
+/* To recognize the popcnt pattern for 32-bit from Hacker's Delight
+   int Gia_WordCountOnes32c( uint32_t uword )
+   {
+   uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
+   uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
+   uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
+   uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
+   return  (uword & 0x0000ffff) + (uword>>16);
+   or
+   return  (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff);
+   }
+*/
+
+  (simplify
+     (plus:c
+       (bit_and @step_4 INTEGER_CST@9)
+       (rshift
+         (plus:c@step4
+            (bit_and @step3 INTEGER_CST@7)
+         (bit_and
+           (rshift
+             (plus:c@step3
+               (bit_and @step2 INTEGER_CST@5)
+               (bit_and
+                 (rshift
+                   (plus:c@step2
+                     (bit_and @step1 INTEGER_CST@3)
+                     (bit_and
+                       (rshift
+                         (plus:c@step1
+                            (bit_and @0 INTEGER_CST@1)
+                            (bit_and (rshift @0 INTEGER_CST@2) @1))
+                         INTEGER_CST@4)
+                       INTEGER_CST@3))
+                    INTEGER_CST@6)
+                   INTEGER_CST@5))
+               INTEGER_CST@8)
+             INTEGER_CST@7))
+           INTEGER_CST@10))
+   (with {
+    unsigned prec = TYPE_PRECISION (type);
+    int shift = prec & 31 ;
+    unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
+    unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
+    unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
+    unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >> shift;
+    unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >> shift;
+    }
+    (if (prec >= 16
+         && prec <= 32
+         && pow2p_hwi (prec)
+         && TYPE_UNSIGNED (type)
+         && integer_onep (@2)
+         && wi::to_widest (@4) == 2
+         && wi::to_widest (@6) == 4
+         && wi::to_widest (@8) == 8
+         && wi::to_widest (@10) == 16
+         && tree_to_uhwi (@1) == c1
+         && tree_to_uhwi (@3) == c2
+         && tree_to_uhwi (@5) == c3
+         && tree_to_uhwi (@7) == c4
+         && tree_to_uhwi (@9) == c5)
+       (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+                                            OPTIMIZE_FOR_BOTH))
+        (convert (IFN_POPCOUNT:type @0))))))
+#endif
+#if GIMPLE
+/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
+int pop(unsigned x) {
+x = x - ((x >> 1) & 0x55555555);
+x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+x = x - 3*((x >> 2) & 0x33333333);
+x = (x + (x >> 4)) & 0x0F0F0F0F;
+x = x + (x >> 8);
+x = x + (x >> 16);
+return x & 0x0000003F
+}
+*/
+(simplify
+ (bit_and
+   (plus
+    (rshift @step4 INTEGER_CST@10)
+     (plus:c@step4
+        (rshift @step3 INTEGER_CST@8)
+         (bit_and@step3
+          (plus
+              (rshift @step2 INTEGER_CST@6)
+                (plus:c@step2
+                  (bit_and @step1 INTEGER_CST@3)
+                  (bit_and
+                    (rshift
+                      (minus@step1
+                          @0
+                          (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
+                      INTEGER_CST@4)
+                    INTEGER_CST@3)))
+            INTEGER_CST@5)))
+    INTEGER_CST@7)
+ (with {
+   unsigned prec = TYPE_PRECISION (type);
+   int shift = prec & 31 ;
+   unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
+   unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
+   unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
+   unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
+   }
+   (if (prec >= 16
+        && prec <= 32
+        && pow2p_hwi (prec)
+        && TYPE_UNSIGNED (type)
+        && integer_onep (@2)
+        && wi::to_widest (@4) == 2
+        && wi::to_widest (@6) == 4
+        && wi::to_widest (@8) == 8
+        && wi::to_widest (@10) == 16
+        && tree_to_uhwi (@1) == c1
+        && tree_to_uhwi (@3) == c2
+        && tree_to_uhwi (@5) == c3
+        && tree_to_uhwi (@7) == c4)
+     (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+                                            OPTIMIZE_FOR_BOTH))
+        (convert (IFN_POPCOUNT:type @0))))))
+#endif
+#if GIMPLE
+
+/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
+/*int pop(unsigned x) {
+  x = x - ((x >> 1) & 0x55555555);
+  x = x - 3*((x >> 2) & 0x33333333)
+  x = (x + (x >> 4)) & 0x0F0F0F0F;
+  x = x + (x >> 8);
+  x = x + (x >> 16);
+  return x & 0x0000003F;
+}
+*/
+(simplify
+ (bit_and
+   (plus
+    (rshift @step4 INTEGER_CST@10)
+     (plus:c@step4
+        (rshift @step3 INTEGER_CST@8)
+         (bit_and@step3
+          (plus
+              (rshift @step2 INTEGER_CST@6)
+              (minus@step2
+                 @step1
+                 (mult:c
+                  (bit_and
+                   (rshift
+                    (minus@step1
+                       @0
+                       (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
+                    INTEGER_CST@4)
+                  INTEGER_CST@3)
+                 INTEGER_CST@11)))
+            INTEGER_CST@5)))
+    INTEGER_CST@7)
+ (with {
+   unsigned prec = TYPE_PRECISION (type);
+   int shift = prec & 31 ;
+   unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
+   unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
+   unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
+   unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
+   }
+   (if (prec >= 16
+        && prec <= 32
+        && pow2p_hwi (prec)
+        && TYPE_UNSIGNED (type)
+        && integer_onep (@2)
+        && wi::to_widest (@4) == 2
+        && wi::to_widest (@6) == 4
+        && wi::to_widest (@8) == 8
+        && wi::to_widest (@10) == 16
+        && wi::to_widest (@11) == 3
+        && tree_to_uhwi (@1) == c1
+        && tree_to_uhwi (@3) == c2
+        && tree_to_uhwi (@5) == c3
+        && tree_to_uhwi (@7) == c4)
+     (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+                                            OPTIMIZE_FOR_BOTH))
+        (convert (IFN_POPCOUNT:type @0))))))
+#endif
+
 (for ffs (FFS)
  /* __builtin_ffs (X) == 0 -> X == 0.
     __builtin_ffs (X) == 6 -> (X & 63) == 32.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
new file mode 100644
index 00000000000..c70837fc53b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1  = 0x55555555UL;
+const unsigned m2  = 0x33333333UL;
+const unsigned m3  = 0x0F0F0F0FUL;
+const unsigned m4  = 0x00FF00FFUL;
+const unsigned m5  = 0x0000FFFFUL;
+
+int Gia_WordCountOnes32c( unsigned uword )
+{
+  uword = (uword & m1) + ((uword>>1) & m1);
+  uword = (uword & m2) + ((uword>>2) & m2);
+  uword = (uword & m3) + ((uword>>4) & m3);
+  uword = (uword & m4) + ((uword>>8) & m4);
+  return  (uword & m5) + (uword>>16);
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
new file mode 100644
index 00000000000..fc6c23b411b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1  = 0x55555555UL;
+const unsigned m2  = 0x33333333UL;
+const unsigned m3  = 0x0F0F0F0FUL;
+const unsigned m4  = 0x00FF00FFUL;
+const unsigned m5  = 0x0000FFFFUL;
+
+int Gia_WordCountOnes32c( unsigned uword )
+{
+  uword = (uword & m1) + ((uword>>1) & m1);
+  uword = (uword & m2) + ((uword>>2) & m2);
+  uword = (uword & m3) + ((uword>>4) & m3);
+  uword = (uword & m4) + ((uword>>8) & m4);
+  return  (uword & m5) + ((uword>>16) & m5);
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
new file mode 100644
index 00000000000..5a12e6892aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1  = 0x55555555UL;
+const unsigned m2  = 0x33333333UL;
+const unsigned m3  = 0x0F0F0F0FUL;
+const unsigned m4  = 0x0000003F;
+
+int pop32c(unsigned x) {
+  x = x - ((x >> 1) & m1);
+  x = (x & m2) + ((x >> 2) & m2);
+  x = (x + (x >> 4)) & m3;
+  x = x + (x >> 8);
+  x = x + (x >> 16);
+  return x & m4;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
new file mode 100644
index 00000000000..4fb08d34984
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1  = 0x55555555UL;
+const unsigned m2  = 0x33333333UL;
+const unsigned m3  = 0x0F0F0F0FUL;
+const unsigned m4  = 0x0000003F;
+
+int popc(unsigned x) {
+  x = x - ((x >> 1) & m1);
+  x = x - 3*((x >> 2) & m2);
+  x = (x + (x >> 4)) & m3;
+  x = x + (x >> 8);
+  x = x + (x >> 16);
+  return x & m4;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
--
2.34.1

Reply via email to