[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: Friday, January 23, 2026 1:30 PM
> To: Roy, Reshma <[email protected]>
> Cc: [email protected]; Kumar, Venkataramanan
> <[email protected]>
> Subject: Re: [PATCH] Enabling POPCNT generation for 32-bit pattern from
> Hacker's
> Delight
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Fri, 23 Jan 2026, Roy, Reshma wrote:
>
> > [Public]
> >
> > Hi,
> >
> > This patch add support for enabling POPCNT generation for 32-bit patterns
> > from
> Hacker's delight.
> >
> > Bootstrapped and tested on x86.
> >
> > Thank You,
> >
> > Reshma Roy
> >
> > gcc/ChangeLog:
> >
> > * match.pd:
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/popcount7.c: New test.
> > * gcc.dg/tree-ssa/popcount7_2.c: New test.
> > * gcc.dg/tree-ssa/popcount8.c: New test.
> > * gcc.dg/tree-ssa/popcount9.c: New test.
> >
> >
> > From 513656c2a431fc5e7215bc9977a084e06f2d6e4b Mon Sep 17 00:00:00 2001
> > From: Reshma Roy <[email protected]>
> > Date: Thu, 11 Dec 2025 10:57:53 +0530
> > Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from
> > Hacker's Delight
> >
> > Pattern 1:
> > int Gia_WordCountOnes32c( uint32_t uword ) {
> > uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> > uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> > uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> > uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> > return (uword & 0x0000ffff) + (uword>>16);
> > or
> > return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF); }
> >
> > Pattern 2:
> > int pop(unsigned x) {
> > x = x - ((x >> 1) & 0x55555555);
> > x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> > x = (x + (x >> 4)) & 0x0F0F0F0F;
> > x = x + (x >> 8);
> > x = x + (x >> 16);
> > return x & 0x0000003F;
> > }
> >
> > Pattern 3:
> > int pop(unsigned x) {
> > x = x - ((x >> 1) & 0x55555555);
> > x = x - 3*((x >> 2) & 0x33333333)
> > x = (x + (x >> 4)) & 0x0F0F0F0F;
> > x = x + (x >> 8);
> > x = x + (x >> 16);
> > return x & 0x0000003F;
> > }
> > ---
> > gcc/match.pd | 185 ++++++++++++++++++++
> > gcc/testsuite/gcc.dg/tree-ssa/popcount7.c | 23 +++
> > gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c | 23 +++
> > gcc/testsuite/gcc.dg/tree-ssa/popcount8.c | 22 +++
> > gcc/testsuite/gcc.dg/tree-ssa/popcount9.c | 22 +++
> > 5 files changed, 275 insertions(+)
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd index
> > bf410a75f5f..380ec0ee694 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
> > #endif
> >
> > +#if GIMPLE
> > +/* To recognize the popcnt pattern for 32-bit from Hacker's Delight
> > + int Gia_WordCountOnes32c( uint32_t uword )
> > + {
> > + uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> > + uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> > + uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> > + uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> > + return (uword & 0x0000ffff) + (uword>>16);
> > + or
> > + return (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff);
> > + }
> > +*/
> > +
> > + (simplify
> > + (plus:c
> > + (bit_and @step_4 INTEGER_CST@9)
> > + (rshift
> > + (plus:c@step4
> > + (bit_and @step3 INTEGER_CST@7)
> > + (bit_and
> > + (rshift
> > + (plus:c@step3
> > + (bit_and @step2 INTEGER_CST@5)
> > + (bit_and
> > + (rshift
> > + (plus:c@step2
> > + (bit_and @step1 INTEGER_CST@3)
> > + (bit_and
> > + (rshift
> > + (plus:c@step1
> > + (bit_and @0 INTEGER_CST@1)
> > + (bit_and (rshift @0 INTEGER_CST@2) @1))
> > + INTEGER_CST@4)
> > + INTEGER_CST@3))
> > + INTEGER_CST@6)
> > + INTEGER_CST@5))
> > + INTEGER_CST@8)
> > + INTEGER_CST@7))
> > + INTEGER_CST@10))
> > + (with {
> > + unsigned prec = TYPE_PRECISION (type);
> > + int shift = prec & 31 ;
> > + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >>
> shift;
> > + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >>
> shift;
> > + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >>
> shift;
> > + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >>
> shift;
> > + unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >>
> shift;
> > + }
> > + (if (prec >= 16
> > + && prec <= 32
> > + && pow2p_hwi (prec)
> > + && TYPE_UNSIGNED (type)
> > + && integer_onep (@2)
> > + && wi::to_widest (@4) == 2
> > + && wi::to_widest (@6) == 4
> > + && wi::to_widest (@8) == 8
> > + && wi::to_widest (@10) == 16
> > + && tree_to_uhwi (@1) == c1
> > + && tree_to_uhwi (@3) == c2
> > + && tree_to_uhwi (@5) == c3
> > + && tree_to_uhwi (@7) == c4
> > + && tree_to_uhwi (@9) == c5)
> > + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> > + OPTIMIZE_FOR_BOTH))
> > + (convert (IFN_POPCOUNT:type @0)))))) #endif #if GIMPLE /*To
> > +recognize the popcnt pattern for 32-bit from Hacker's Delight int
> > +pop(unsigned x) { x = x - ((x >> 1) & 0x55555555); x = (x &
> > +0x33333333) + ((x >> 2) & 0x33333333); x = x - 3*((x >> 2) &
> > +0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x >> 8); x = x
> > ++ (x >> 16); return x & 0x0000003F
>
> oddly enoug the existing
>
> int popcount32c (uint32_t x)
> {
> x -= (x >> 1) & 0x55555555;
> x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> x = (x + (x >> 4)) & 0x0f0f0f0f;
> return (x * 0x01010101) >> 24;
>
> looks a lot simpler?
Yes, it is simpler. I think the new pattern, even though complex than the
existing one,
ensures GCC recognizes more variations of manual bit-counting.
>
> Please put the new patterns next to the existing one, before the FFS
> simplification.
The changes were made and the updated patch is provided in the mail.
>
> Otherwise this looks like a straight-forward addition of another popcount
> form, but it
> has to wait for stage1 to land.
Noted.
>
> I do wonder whether it's possible to use the CRC stmt-simulation
> infrastructure to
> detect more variants of the bit inspection builtins without enumerating them
> in
> match.pd.
I will explore the possibility of using the CRC stmt-simulation infrastructure.
Meanwhile, can we
introduce the patterns into match.pd now, and later migrate the patterns into
CRC infrastructure,
including the newly added ones, without losing them?
>
> Thanks,
> Richard.
>
> > +}
> > +*/
> > +(simplify
> > + (bit_and
> > + (plus
> > + (rshift @step4 INTEGER_CST@10)
> > + (plus:c@step4
> > + (rshift @step3 INTEGER_CST@8)
> > + (bit_and@step3
> > + (plus
> > + (rshift @step2 INTEGER_CST@6)
> > + (plus:c@step2
> > + (bit_and @step1 INTEGER_CST@3)
> > + (bit_and
> > + (rshift
> > + (minus@step1
> > + @0
> > + (bit_and (rshift @0 INTEGER_CST@2)
> > INTEGER_CST@1))
> > + INTEGER_CST@4)
> > + INTEGER_CST@3)))
> > + INTEGER_CST@5)))
> > + INTEGER_CST@7)
> > + (with {
> > + unsigned prec = TYPE_PRECISION (type);
> > + int shift = prec & 31 ;
> > + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >>
> shift;
> > + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >>
> shift;
> > + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >>
> shift;
> > + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >>
> shift;
> > + }
> > + (if (prec >= 16
> > + && prec <= 32
> > + && pow2p_hwi (prec)
> > + && TYPE_UNSIGNED (type)
> > + && integer_onep (@2)
> > + && wi::to_widest (@4) == 2
> > + && wi::to_widest (@6) == 4
> > + && wi::to_widest (@8) == 8
> > + && wi::to_widest (@10) == 16
> > + && tree_to_uhwi (@1) == c1
> > + && tree_to_uhwi (@3) == c2
> > + && tree_to_uhwi (@5) == c3
> > + && tree_to_uhwi (@7) == c4)
> > + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> > + OPTIMIZE_FOR_BOTH))
> > + (convert (IFN_POPCOUNT:type @0)))))) #endif #if GIMPLE
> > +
> > +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
> > +/*int pop(unsigned x) {
> > + x = x - ((x >> 1) & 0x55555555);
> > + x = x - 3*((x >> 2) & 0x33333333)
> > + x = (x + (x >> 4)) & 0x0F0F0F0F;
> > + x = x + (x >> 8);
> > + x = x + (x >> 16);
> > + return x & 0x0000003F;
> > +}
> > +*/
> > +(simplify
> > + (bit_and
> > + (plus
> > + (rshift @step4 INTEGER_CST@10)
> > + (plus:c@step4
> > + (rshift @step3 INTEGER_CST@8)
> > + (bit_and@step3
> > + (plus
> > + (rshift @step2 INTEGER_CST@6)
> > + (minus@step2
> > + @step1
> > + (mult:c
> > + (bit_and
> > + (rshift
> > + (minus@step1
> > + @0
> > + (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
> > + INTEGER_CST@4)
> > + INTEGER_CST@3)
> > + INTEGER_CST@11)))
> > + INTEGER_CST@5)))
> > + INTEGER_CST@7)
> > + (with {
> > + unsigned prec = TYPE_PRECISION (type);
> > + int shift = prec & 31 ;
> > + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >>
> shift;
> > + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >>
> shift;
> > + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >>
> shift;
> > + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >>
> shift;
> > + }
> > + (if (prec >= 16
> > + && prec <= 32
> > + && pow2p_hwi (prec)
> > + && TYPE_UNSIGNED (type)
> > + && integer_onep (@2)
> > + && wi::to_widest (@4) == 2
> > + && wi::to_widest (@6) == 4
> > + && wi::to_widest (@8) == 8
> > + && wi::to_widest (@10) == 16
> > + && wi::to_widest (@11) == 3
> > + && tree_to_uhwi (@1) == c1
> > + && tree_to_uhwi (@3) == c2
> > + && tree_to_uhwi (@5) == c3
> > + && tree_to_uhwi (@7) == c4)
> > + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> > + OPTIMIZE_FOR_BOTH))
> > + (convert (IFN_POPCOUNT:type @0)))))) #endif
> > +
> > (for ffs (FFS)
> > /* __builtin_ffs (X) == 0 -> X == 0.
> > __builtin_ffs (X) == 6 -> (X & 63) == 32. */ diff --git
> > a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > new file mode 100644
> > index 00000000000..c70837fc53b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > @@ -0,0 +1,23 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcount } */
> > +/* { dg-require-effective-target int32plus } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +const unsigned m1 = 0x55555555UL;
> > +const unsigned m2 = 0x33333333UL;
> > +const unsigned m3 = 0x0F0F0F0FUL;
> > +const unsigned m4 = 0x00FF00FFUL;
> > +const unsigned m5 = 0x0000FFFFUL;
> > +
> > +int Gia_WordCountOnes32c( unsigned uword ) {
> > + uword = (uword & m1) + ((uword>>1) & m1);
> > + uword = (uword & m2) + ((uword>>2) & m2);
> > + uword = (uword & m3) + ((uword>>4) & m3);
> > + uword = (uword & m4) + ((uword>>8) & m4);
> > + return (uword & m5) + (uword>>16); }
> > +
> > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > +*/
> > +
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > new file mode 100644
> > index 00000000000..fc6c23b411b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > @@ -0,0 +1,23 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcount } */
> > +/* { dg-require-effective-target int32plus } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +const unsigned m1 = 0x55555555UL;
> > +const unsigned m2 = 0x33333333UL;
> > +const unsigned m3 = 0x0F0F0F0FUL;
> > +const unsigned m4 = 0x00FF00FFUL;
> > +const unsigned m5 = 0x0000FFFFUL;
> > +
> > +int Gia_WordCountOnes32c( unsigned uword ) {
> > + uword = (uword & m1) + ((uword>>1) & m1);
> > + uword = (uword & m2) + ((uword>>2) & m2);
> > + uword = (uword & m3) + ((uword>>4) & m3);
> > + uword = (uword & m4) + ((uword>>8) & m4);
> > + return (uword & m5) + ((uword>>16) & m5); }
> > +
> > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > +*/
> > +
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > new file mode 100644
> > index 00000000000..5a12e6892aa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > @@ -0,0 +1,22 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcount } */
> > +/* { dg-require-effective-target int32plus } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +const unsigned m1 = 0x55555555UL;
> > +const unsigned m2 = 0x33333333UL;
> > +const unsigned m3 = 0x0F0F0F0FUL;
> > +const unsigned m4 = 0x0000003F;
> > +
> > +int pop32c(unsigned x) {
> > + x = x - ((x >> 1) & m1);
> > + x = (x & m2) + ((x >> 2) & m2);
> > + x = (x + (x >> 4)) & m3;
> > + x = x + (x >> 8);
> > + x = x + (x >> 16);
> > + return x & m4;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > +*/
> > +
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > new file mode 100644
> > index 00000000000..4fb08d34984
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > @@ -0,0 +1,22 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcount } */
> > +/* { dg-require-effective-target int32plus } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +const unsigned m1 = 0x55555555UL;
> > +const unsigned m2 = 0x33333333UL;
> > +const unsigned m3 = 0x0F0F0F0FUL;
> > +const unsigned m4 = 0x0000003F;
> > +
> > +int popc(unsigned x) {
> > + x = x - ((x >> 1) & m1);
> > + x = x - 3*((x >> 2) & m2);
> > + x = (x + (x >> 4)) & m3;
> > + x = x + (x >> 8);
> > + x = x + (x >> 16);
> > + return x & m4;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > +*/
> > +
> > +
> > --
> > 2.34.1
> >
>
> --
> Richard Biener <[email protected]>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> Nuernberg)
Updated patch:
gcc/ChangeLog:
* match.pd:
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/popcount7.c: New test.
* gcc.dg/tree-ssa/popcount7_2.c: New test.
* gcc.dg/tree-ssa/popcount8.c: New test.
* gcc.dg/tree-ssa/popcount9.c: New test.
>From 99ef45b9cd509e428ddb2e89c3784b86b1d92f5e Mon Sep 17 00:00:00 2001
From: Reshma Roy <[email protected]>
Date: Thu, 11 Dec 2025 10:57:53 +0530
Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from Hacker's
Delight
Pattern 1:
int Gia_WordCountOnes32c( uint32_t uword )
{
uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
return (uword & 0x0000ffff) + (uword>>16);
or
return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF);
}
Pattern 2:
int pop(unsigned x) {
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x + (x >> 4)) & 0x0F0F0F0F;
x = x + (x >> 8);
x = x + (x >> 16);
return x & 0x0000003F;
}
Pattern 3:
int pop(unsigned x) {
x = x - ((x >> 1) & 0x55555555);
x = x - 3*((x >> 2) & 0x33333333)
x = (x + (x >> 4)) & 0x0F0F0F0F;
x = x + (x >> 8);
x = x + (x >> 16);
return x & 0x0000003F;
}
---
gcc/match.pd | 185 ++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/popcount7.c | 23 +++
gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c | 23 +++
gcc/testsuite/gcc.dg/tree-ssa/popcount8.c | 22 +++
gcc/testsuite/gcc.dg/tree-ssa/popcount9.c | 22 +++
5 files changed, 275 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
diff --git a/gcc/match.pd b/gcc/match.pd
index bf410a75f5f..380ec0ee694 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
#endif
+#if GIMPLE
+/* To recognize the popcnt pattern for 32-bit from Hacker's Delight
+ int Gia_WordCountOnes32c( uint32_t uword )
+ {
+ uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
+ uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
+ uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
+ uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
+ return (uword & 0x0000ffff) + (uword>>16);
+ or
+ return (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff);
+ }
+*/
+
+ (simplify
+ (plus:c
+ (bit_and @step_4 INTEGER_CST@9)
+ (rshift
+ (plus:c@step4
+ (bit_and @step3 INTEGER_CST@7)
+ (bit_and
+ (rshift
+ (plus:c@step3
+ (bit_and @step2 INTEGER_CST@5)
+ (bit_and
+ (rshift
+ (plus:c@step2
+ (bit_and @step1 INTEGER_CST@3)
+ (bit_and
+ (rshift
+ (plus:c@step1
+ (bit_and @0 INTEGER_CST@1)
+ (bit_and (rshift @0 INTEGER_CST@2) @1))
+ INTEGER_CST@4)
+ INTEGER_CST@3))
+ INTEGER_CST@6)
+ INTEGER_CST@5))
+ INTEGER_CST@8)
+ INTEGER_CST@7))
+ INTEGER_CST@10))
+ (with {
+ unsigned prec = TYPE_PRECISION (type);
+ int shift = prec & 31 ;
+ unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
+ unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
+ unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
+ unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >> shift;
+ unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >> shift;
+ }
+ (if (prec >= 16
+ && prec <= 32
+ && pow2p_hwi (prec)
+ && TYPE_UNSIGNED (type)
+ && integer_onep (@2)
+ && wi::to_widest (@4) == 2
+ && wi::to_widest (@6) == 4
+ && wi::to_widest (@8) == 8
+ && wi::to_widest (@10) == 16
+ && tree_to_uhwi (@1) == c1
+ && tree_to_uhwi (@3) == c2
+ && tree_to_uhwi (@5) == c3
+ && tree_to_uhwi (@7) == c4
+ && tree_to_uhwi (@9) == c5)
+ (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (IFN_POPCOUNT:type @0))))))
+#endif
+#if GIMPLE
+/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
+int pop(unsigned x) {
+x = x - ((x >> 1) & 0x55555555);
+x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+x = x - 3*((x >> 2) & 0x33333333);
+x = (x + (x >> 4)) & 0x0F0F0F0F;
+x = x + (x >> 8);
+x = x + (x >> 16);
+return x & 0x0000003F
+}
+*/
+(simplify
+ (bit_and
+ (plus
+ (rshift @step4 INTEGER_CST@10)
+ (plus:c@step4
+ (rshift @step3 INTEGER_CST@8)
+ (bit_and@step3
+ (plus
+ (rshift @step2 INTEGER_CST@6)
+ (plus:c@step2
+ (bit_and @step1 INTEGER_CST@3)
+ (bit_and
+ (rshift
+ (minus@step1
+ @0
+ (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
+ INTEGER_CST@4)
+ INTEGER_CST@3)))
+ INTEGER_CST@5)))
+ INTEGER_CST@7)
+ (with {
+ unsigned prec = TYPE_PRECISION (type);
+ int shift = prec & 31 ;
+ unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
+ unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
+ unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
+ unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
+ }
+ (if (prec >= 16
+ && prec <= 32
+ && pow2p_hwi (prec)
+ && TYPE_UNSIGNED (type)
+ && integer_onep (@2)
+ && wi::to_widest (@4) == 2
+ && wi::to_widest (@6) == 4
+ && wi::to_widest (@8) == 8
+ && wi::to_widest (@10) == 16
+ && tree_to_uhwi (@1) == c1
+ && tree_to_uhwi (@3) == c2
+ && tree_to_uhwi (@5) == c3
+ && tree_to_uhwi (@7) == c4)
+ (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (IFN_POPCOUNT:type @0))))))
+#endif
+#if GIMPLE
+
+/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
+/*int pop(unsigned x) {
+ x = x - ((x >> 1) & 0x55555555);
+ x = x - 3*((x >> 2) & 0x33333333)
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x = x + (x >> 8);
+ x = x + (x >> 16);
+ return x & 0x0000003F;
+}
+*/
+(simplify
+ (bit_and
+ (plus
+ (rshift @step4 INTEGER_CST@10)
+ (plus:c@step4
+ (rshift @step3 INTEGER_CST@8)
+ (bit_and@step3
+ (plus
+ (rshift @step2 INTEGER_CST@6)
+ (minus@step2
+ @step1
+ (mult:c
+ (bit_and
+ (rshift
+ (minus@step1
+ @0
+ (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
+ INTEGER_CST@4)
+ INTEGER_CST@3)
+ INTEGER_CST@11)))
+ INTEGER_CST@5)))
+ INTEGER_CST@7)
+ (with {
+ unsigned prec = TYPE_PRECISION (type);
+ int shift = prec & 31 ;
+ unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
+ unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
+ unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
+ unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
+ }
+ (if (prec >= 16
+ && prec <= 32
+ && pow2p_hwi (prec)
+ && TYPE_UNSIGNED (type)
+ && integer_onep (@2)
+ && wi::to_widest (@4) == 2
+ && wi::to_widest (@6) == 4
+ && wi::to_widest (@8) == 8
+ && wi::to_widest (@10) == 16
+ && wi::to_widest (@11) == 3
+ && tree_to_uhwi (@1) == c1
+ && tree_to_uhwi (@3) == c2
+ && tree_to_uhwi (@5) == c3
+ && tree_to_uhwi (@7) == c4)
+ (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (IFN_POPCOUNT:type @0))))))
+#endif
+
(for ffs (FFS)
/* __builtin_ffs (X) == 0 -> X == 0.
__builtin_ffs (X) == 6 -> (X & 63) == 32. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
new file mode 100644
index 00000000000..c70837fc53b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1 = 0x55555555UL;
+const unsigned m2 = 0x33333333UL;
+const unsigned m3 = 0x0F0F0F0FUL;
+const unsigned m4 = 0x00FF00FFUL;
+const unsigned m5 = 0x0000FFFFUL;
+
+int Gia_WordCountOnes32c( unsigned uword )
+{
+ uword = (uword & m1) + ((uword>>1) & m1);
+ uword = (uword & m2) + ((uword>>2) & m2);
+ uword = (uword & m3) + ((uword>>4) & m3);
+ uword = (uword & m4) + ((uword>>8) & m4);
+ return (uword & m5) + (uword>>16);
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
new file mode 100644
index 00000000000..fc6c23b411b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1 = 0x55555555UL;
+const unsigned m2 = 0x33333333UL;
+const unsigned m3 = 0x0F0F0F0FUL;
+const unsigned m4 = 0x00FF00FFUL;
+const unsigned m5 = 0x0000FFFFUL;
+
+int Gia_WordCountOnes32c( unsigned uword )
+{
+ uword = (uword & m1) + ((uword>>1) & m1);
+ uword = (uword & m2) + ((uword>>2) & m2);
+ uword = (uword & m3) + ((uword>>4) & m3);
+ uword = (uword & m4) + ((uword>>8) & m4);
+ return (uword & m5) + ((uword>>16) & m5);
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
new file mode 100644
index 00000000000..5a12e6892aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1 = 0x55555555UL;
+const unsigned m2 = 0x33333333UL;
+const unsigned m3 = 0x0F0F0F0FUL;
+const unsigned m4 = 0x0000003F;
+
+int pop32c(unsigned x) {
+ x = x - ((x >> 1) & m1);
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m3;
+ x = x + (x >> 8);
+ x = x + (x >> 16);
+ return x & m4;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
new file mode 100644
index 00000000000..4fb08d34984
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1 = 0x55555555UL;
+const unsigned m2 = 0x33333333UL;
+const unsigned m3 = 0x0F0F0F0FUL;
+const unsigned m4 = 0x0000003F;
+
+int popc(unsigned x) {
+ x = x - ((x >> 1) & m1);
+ x = x - 3*((x >> 2) & m2);
+ x = (x + (x >> 4)) & m3;
+ x = x + (x >> 8);
+ x = x + (x >> 16);
+ return x & m4;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+
+
--
2.34.1