On Fri, 23 Jan 2026, Roy, Reshma wrote:
> [Public]
>
> Hi,
>
> This patch add support for enabling POPCNT generation for 32-bit patterns
> from Hacker's delight.
>
> Bootstrapped and tested on x86.
>
> Thank You,
>
> Reshma Roy
>
> gcc/ChangeLog:
>
> * match.pd:
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/popcount7.c: New test.
> * gcc.dg/tree-ssa/popcount7_2.c: New test.
> * gcc.dg/tree-ssa/popcount8.c: New test.
> * gcc.dg/tree-ssa/popcount9.c: New test.
>
>
> From 513656c2a431fc5e7215bc9977a084e06f2d6e4b Mon Sep 17 00:00:00 2001
> From: Reshma Roy <[email protected]>
> Date: Thu, 11 Dec 2025 10:57:53 +0530
> Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from
> Hacker's Delight
>
> Pattern 1:
> int Gia_WordCountOnes32c( uint32_t uword )
> {
> uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> return (uword & 0x0000ffff) + (uword>>16);
> or
> return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF);
> }
>
> Pattern 2:
> int pop(unsigned x) {
> x = x - ((x >> 1) & 0x55555555);
> x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> x = (x + (x >> 4)) & 0x0F0F0F0F;
> x = x + (x >> 8);
> x = x + (x >> 16);
> return x & 0x0000003F;
> }
>
> Pattern 3:
> int pop(unsigned x) {
> x = x - ((x >> 1) & 0x55555555);
> x = x - 3*((x >> 2) & 0x33333333)
> x = (x + (x >> 4)) & 0x0F0F0F0F;
> x = x + (x >> 8);
> x = x + (x >> 16);
> return x & 0x0000003F;
> }
> ---
> gcc/match.pd | 185 ++++++++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/popcount7.c | 23 +++
> gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c | 23 +++
> gcc/testsuite/gcc.dg/tree-ssa/popcount8.c | 22 +++
> gcc/testsuite/gcc.dg/tree-ssa/popcount9.c | 22 +++
> 5 files changed, 275 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index bf410a75f5f..380ec0ee694 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
> #endif
>
> +#if GIMPLE
> +/* To recognize the popcnt pattern for 32-bit from Hacker's Delight
> + int Gia_WordCountOnes32c( uint32_t uword )
> + {
> + uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> + uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> + uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> + uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> + return (uword & 0x0000ffff) + (uword>>16);
> + or
> + return (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff);
> + }
> +*/
> +
> + (simplify
> + (plus:c
> + (bit_and @step_4 INTEGER_CST@9)
> + (rshift
> + (plus:c@step4
> + (bit_and @step3 INTEGER_CST@7)
> + (bit_and
> + (rshift
> + (plus:c@step3
> + (bit_and @step2 INTEGER_CST@5)
> + (bit_and
> + (rshift
> + (plus:c@step2
> + (bit_and @step1 INTEGER_CST@3)
> + (bit_and
> + (rshift
> + (plus:c@step1
> + (bit_and @0 INTEGER_CST@1)
> + (bit_and (rshift @0 INTEGER_CST@2) @1))
> + INTEGER_CST@4)
> + INTEGER_CST@3))
> + INTEGER_CST@6)
> + INTEGER_CST@5))
> + INTEGER_CST@8)
> + INTEGER_CST@7))
> + INTEGER_CST@10))
> + (with {
> + unsigned prec = TYPE_PRECISION (type);
> + int shift = prec & 31 ;
> + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
> + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
> + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
> + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >> shift;
> + unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >> shift;
> + }
> + (if (prec >= 16
> + && prec <= 32
> + && pow2p_hwi (prec)
> + && TYPE_UNSIGNED (type)
> + && integer_onep (@2)
> + && wi::to_widest (@4) == 2
> + && wi::to_widest (@6) == 4
> + && wi::to_widest (@8) == 8
> + && wi::to_widest (@10) == 16
> + && tree_to_uhwi (@1) == c1
> + && tree_to_uhwi (@3) == c2
> + && tree_to_uhwi (@5) == c3
> + && tree_to_uhwi (@7) == c4
> + && tree_to_uhwi (@9) == c5)
> + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> + OPTIMIZE_FOR_BOTH))
> + (convert (IFN_POPCOUNT:type @0))))))
> +#endif
> +#if GIMPLE
> +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
> +int pop(unsigned x) {
> +x = x - ((x >> 1) & 0x55555555);
> +x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> +x = x - 3*((x >> 2) & 0x33333333);
> +x = (x + (x >> 4)) & 0x0F0F0F0F;
> +x = x + (x >> 8);
> +x = x + (x >> 16);
> +return x & 0x0000003F
oddly enoug the existing
int popcount32c (uint32_t x)
{
x -= (x >> 1) & 0x55555555;
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x + (x >> 4)) & 0x0f0f0f0f;
return (x * 0x01010101) >> 24;
looks a lot simpler?
Please put the new patterns next to the existing one, before the
FFS simplification.
Otherwise this looks like a straight-forward addition of another
popcount form, but it has to wait for stage1 to land.
I do wonder whether it's possible to use the CRC stmt-simulation
infrastructure to detect more variants of the bit inspection
builtins without enumerating them in match.pd.
Thanks,
Richard.
> +}
> +*/
> +(simplify
> + (bit_and
> + (plus
> + (rshift @step4 INTEGER_CST@10)
> + (plus:c@step4
> + (rshift @step3 INTEGER_CST@8)
> + (bit_and@step3
> + (plus
> + (rshift @step2 INTEGER_CST@6)
> + (plus:c@step2
> + (bit_and @step1 INTEGER_CST@3)
> + (bit_and
> + (rshift
> + (minus@step1
> + @0
> + (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
> + INTEGER_CST@4)
> + INTEGER_CST@3)))
> + INTEGER_CST@5)))
> + INTEGER_CST@7)
> + (with {
> + unsigned prec = TYPE_PRECISION (type);
> + int shift = prec & 31 ;
> + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
> + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
> + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
> + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
> + }
> + (if (prec >= 16
> + && prec <= 32
> + && pow2p_hwi (prec)
> + && TYPE_UNSIGNED (type)
> + && integer_onep (@2)
> + && wi::to_widest (@4) == 2
> + && wi::to_widest (@6) == 4
> + && wi::to_widest (@8) == 8
> + && wi::to_widest (@10) == 16
> + && tree_to_uhwi (@1) == c1
> + && tree_to_uhwi (@3) == c2
> + && tree_to_uhwi (@5) == c3
> + && tree_to_uhwi (@7) == c4)
> + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> + OPTIMIZE_FOR_BOTH))
> + (convert (IFN_POPCOUNT:type @0))))))
> +#endif
> +#if GIMPLE
> +
> +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
> +/*int pop(unsigned x) {
> + x = x - ((x >> 1) & 0x55555555);
> + x = x - 3*((x >> 2) & 0x33333333)
> + x = (x + (x >> 4)) & 0x0F0F0F0F;
> + x = x + (x >> 8);
> + x = x + (x >> 16);
> + return x & 0x0000003F;
> +}
> +*/
> +(simplify
> + (bit_and
> + (plus
> + (rshift @step4 INTEGER_CST@10)
> + (plus:c@step4
> + (rshift @step3 INTEGER_CST@8)
> + (bit_and@step3
> + (plus
> + (rshift @step2 INTEGER_CST@6)
> + (minus@step2
> + @step1
> + (mult:c
> + (bit_and
> + (rshift
> + (minus@step1
> + @0
> + (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
> + INTEGER_CST@4)
> + INTEGER_CST@3)
> + INTEGER_CST@11)))
> + INTEGER_CST@5)))
> + INTEGER_CST@7)
> + (with {
> + unsigned prec = TYPE_PRECISION (type);
> + int shift = prec & 31 ;
> + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
> + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
> + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
> + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
> + }
> + (if (prec >= 16
> + && prec <= 32
> + && pow2p_hwi (prec)
> + && TYPE_UNSIGNED (type)
> + && integer_onep (@2)
> + && wi::to_widest (@4) == 2
> + && wi::to_widest (@6) == 4
> + && wi::to_widest (@8) == 8
> + && wi::to_widest (@10) == 16
> + && wi::to_widest (@11) == 3
> + && tree_to_uhwi (@1) == c1
> + && tree_to_uhwi (@3) == c2
> + && tree_to_uhwi (@5) == c3
> + && tree_to_uhwi (@7) == c4)
> + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> + OPTIMIZE_FOR_BOTH))
> + (convert (IFN_POPCOUNT:type @0))))))
> +#endif
> +
> (for ffs (FFS)
> /* __builtin_ffs (X) == 0 -> X == 0.
> __builtin_ffs (X) == 6 -> (X & 63) == 32. */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> new file mode 100644
> index 00000000000..c70837fc53b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1 = 0x55555555UL;
> +const unsigned m2 = 0x33333333UL;
> +const unsigned m3 = 0x0F0F0F0FUL;
> +const unsigned m4 = 0x00FF00FFUL;
> +const unsigned m5 = 0x0000FFFFUL;
> +
> +int Gia_WordCountOnes32c( unsigned uword )
> +{
> + uword = (uword & m1) + ((uword>>1) & m1);
> + uword = (uword & m2) + ((uword>>2) & m2);
> + uword = (uword & m3) + ((uword>>4) & m3);
> + uword = (uword & m4) + ((uword>>8) & m4);
> + return (uword & m5) + (uword>>16);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> new file mode 100644
> index 00000000000..fc6c23b411b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1 = 0x55555555UL;
> +const unsigned m2 = 0x33333333UL;
> +const unsigned m3 = 0x0F0F0F0FUL;
> +const unsigned m4 = 0x00FF00FFUL;
> +const unsigned m5 = 0x0000FFFFUL;
> +
> +int Gia_WordCountOnes32c( unsigned uword )
> +{
> + uword = (uword & m1) + ((uword>>1) & m1);
> + uword = (uword & m2) + ((uword>>2) & m2);
> + uword = (uword & m3) + ((uword>>4) & m3);
> + uword = (uword & m4) + ((uword>>8) & m4);
> + return (uword & m5) + ((uword>>16) & m5);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> new file mode 100644
> index 00000000000..5a12e6892aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1 = 0x55555555UL;
> +const unsigned m2 = 0x33333333UL;
> +const unsigned m3 = 0x0F0F0F0FUL;
> +const unsigned m4 = 0x0000003F;
> +
> +int pop32c(unsigned x) {
> + x = x - ((x >> 1) & m1);
> + x = (x & m2) + ((x >> 2) & m2);
> + x = (x + (x >> 4)) & m3;
> + x = x + (x >> 8);
> + x = x + (x >> 16);
> + return x & m4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> new file mode 100644
> index 00000000000..4fb08d34984
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1 = 0x55555555UL;
> +const unsigned m2 = 0x33333333UL;
> +const unsigned m3 = 0x0F0F0F0FUL;
> +const unsigned m4 = 0x0000003F;
> +
> +int popc(unsigned x) {
> + x = x - ((x >> 1) & m1);
> + x = x - 3*((x >> 2) & m2);
> + x = (x + (x >> 4)) & m3;
> + x = x + (x >> 8);
> + x = x + (x >> 16);
> + return x & m4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> --
> 2.34.1
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)