On Fri, 23 Jan 2026, Roy, Reshma wrote:

> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> > -----Original Message-----
> > From: Richard Biener <[email protected]>
> > Sent: Friday, January 23, 2026 1:30 PM
> > To: Roy, Reshma <[email protected]>
> > Cc: [email protected]; Kumar, Venkataramanan
> > <[email protected]>
> > Subject: Re: [PATCH] Enabling POPCNT generation for 32-bit pattern from 
> > Hacker's
> > Delight
> >
> > Caution: This message originated from an External Source. Use proper caution
> > when opening attachments, clicking links, or responding.
> >
> >
> > On Fri, 23 Jan 2026, Roy, Reshma wrote:
> >
> > > [Public]
> > >
> > > Hi,
> > >
> > > This patch add support for enabling POPCNT generation for 32-bit patterns 
> > > from
> > Hacker's delight.
> > >
> > > Bootstrapped and tested on x86.
> > >
> > > Thank You,
> > >
> > > Reshma Roy
> > >
> > > gcc/ChangeLog:
> > >
> > > * match.pd:
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.dg/tree-ssa/popcount7.c: New test.
> > > * gcc.dg/tree-ssa/popcount7_2.c: New test.
> > > * gcc.dg/tree-ssa/popcount8.c: New test.
> > > * gcc.dg/tree-ssa/popcount9.c: New test.
> > >
> > >
> > > From 513656c2a431fc5e7215bc9977a084e06f2d6e4b Mon Sep 17 00:00:00 2001
> > > From: Reshma Roy <[email protected]>
> > > Date: Thu, 11 Dec 2025 10:57:53 +0530
> > > Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from
> > > Hacker's Delight
> > >
> > > Pattern 1:
> > > int Gia_WordCountOnes32c( uint32_t uword ) {
> > >   uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> > >   uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> > >   uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> > >   uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> > >   return  (uword & 0x0000ffff) + (uword>>16);
> > >   or
> > >   return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF); }
> > >
> > > Pattern 2:
> > > int pop(unsigned x) {
> > >   x = x - ((x >> 1) & 0x55555555);
> > >   x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> > >   x = (x + (x >> 4)) & 0x0F0F0F0F;
> > >   x = x + (x >> 8);
> > >   x = x + (x >> 16);
> > >   return x & 0x0000003F;
> > > }
> > >
> > > Pattern 3:
> > > int pop(unsigned x) {
> > >   x = x - ((x >> 1) & 0x55555555);
> > >   x = x - 3*((x >> 2) & 0x33333333)
> > >     x = (x + (x >> 4)) & 0x0F0F0F0F;
> > >   x = x + (x >> 8);
> > >   x = x + (x >> 16);
> > >   return x & 0x0000003F;
> > > }
> > > ---
> > >  gcc/match.pd                                | 185 ++++++++++++++++++++
> > >  gcc/testsuite/gcc.dg/tree-ssa/popcount7.c   |  23 +++
> > >  gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c |  23 +++
> > >  gcc/testsuite/gcc.dg/tree-ssa/popcount8.c   |  22 +++
> > >  gcc/testsuite/gcc.dg/tree-ssa/popcount9.c   |  22 +++
> > >  5 files changed, 275 insertions(+)
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd index
> > > bf410a75f5f..380ec0ee694 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >     (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
> > > #endif
> > >
> > > +#if GIMPLE
> > > +/* To recognize the popcnt pattern for 32-bit from Hacker's Delight
> > > +   int Gia_WordCountOnes32c( uint32_t uword )
> > > +   {
> > > +   uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> > > +   uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> > > +   uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> > > +   uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> > > +   return  (uword & 0x0000ffff) + (uword>>16);
> > > +   or
> > > +   return  (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff);
> > > +   }
> > > +*/
> > > +
> > > +  (simplify
> > > +     (plus:c
> > > +       (bit_and @step_4 INTEGER_CST@9)
> > > +       (rshift
> > > +         (plus:c@step4
> > > +            (bit_and @step3 INTEGER_CST@7)
> > > +         (bit_and
> > > +           (rshift
> > > +             (plus:c@step3
> > > +               (bit_and @step2 INTEGER_CST@5)
> > > +               (bit_and
> > > +                 (rshift
> > > +                   (plus:c@step2
> > > +                     (bit_and @step1 INTEGER_CST@3)
> > > +                     (bit_and
> > > +                       (rshift
> > > +                         (plus:c@step1
> > > +                            (bit_and @0 INTEGER_CST@1)
> > > +                            (bit_and (rshift @0 INTEGER_CST@2) @1))
> > > +                         INTEGER_CST@4)
> > > +                       INTEGER_CST@3))
> > > +                    INTEGER_CST@6)
> > > +                   INTEGER_CST@5))
> > > +               INTEGER_CST@8)
> > > +             INTEGER_CST@7))
> > > +           INTEGER_CST@10))
> > > +   (with {
> > > +    unsigned prec = TYPE_PRECISION (type);
> > > +    int shift = prec & 31 ;
> > > +    unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >>
> > shift;
> > > +    unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >>
> > shift;
> > > +    unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >>
> > shift;
> > > +    unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >>
> > shift;
> > > +    unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >>
> > shift;
> > > +    }
> > > +    (if (prec >= 16
> > > +         && prec <= 32
> > > +         && pow2p_hwi (prec)
> > > +         && TYPE_UNSIGNED (type)
> > > +         && integer_onep (@2)
> > > +         && wi::to_widest (@4) == 2
> > > +         && wi::to_widest (@6) == 4
> > > +         && wi::to_widest (@8) == 8
> > > +         && wi::to_widest (@10) == 16
> > > +         && tree_to_uhwi (@1) == c1
> > > +         && tree_to_uhwi (@3) == c2
> > > +         && tree_to_uhwi (@5) == c3
> > > +         && tree_to_uhwi (@7) == c4
> > > +         && tree_to_uhwi (@9) == c5)
> > > +       (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> > > +                                            OPTIMIZE_FOR_BOTH))
> > > +        (convert (IFN_POPCOUNT:type @0)))))) #endif #if GIMPLE /*To
> > > +recognize the popcnt pattern for 32-bit from Hacker's Delight int
> > > +pop(unsigned x) { x = x - ((x >> 1) & 0x55555555); x = (x &
> > > +0x33333333) + ((x >> 2) & 0x33333333); x = x - 3*((x >> 2) &
> > > +0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x >> 8); x = x
> > > ++ (x >> 16); return x & 0x0000003F
> >
> > oddly enoug the existing
> >
> >    int popcount32c (uint32_t x)
> >    {
> >      x -= (x >> 1) & 0x55555555;
> >      x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> >      x = (x + (x >> 4)) & 0x0f0f0f0f;
> >      return (x * 0x01010101) >> 24;
> >
> > looks a lot simpler?
> 
> Yes, it is simpler. I think the new pattern, even though complex than the 
> existing one,
> ensures GCC recognizes more variations of manual bit-counting.
> >
> > Please put the new patterns next to the existing one, before the FFS 
> > simplification.
> 
> The changes were made and the updated patch is provided in the mail.
> >
> > Otherwise this looks like a straight-forward addition of another popcount 
> > form, but it
> > has to wait for stage1 to land.
> 
> Noted.
> >
> > I do wonder whether it's possible to use the CRC stmt-simulation 
> > infrastructure to
> > detect more variants of the bit inspection builtins without enumerating 
> > them in
> > match.pd.
> 
> I will explore the possibility of using the CRC stmt-simulation 
> infrastructure. Meanwhile, can we
> introduce the patterns into match.pd now, and later migrate the patterns into 
> CRC infrastructure,
> including the newly added ones, without losing them?

Sure.  As said, the patch has to wait for stage1 anyway, I will have a
more detailed look at that point.  I'm noticing the ChangeLog looks
incomplete for the match.pd change.  I suggest sth like

        * match.pd: Add new popcount pattern variants.

> >
> > Thanks,
> > Richard.
> >
> > > +}
> > > +*/
> > > +(simplify
> > > + (bit_and
> > > +   (plus
> > > +    (rshift @step4 INTEGER_CST@10)
> > > +     (plus:c@step4
> > > +        (rshift @step3 INTEGER_CST@8)
> > > +         (bit_and@step3
> > > +          (plus
> > > +              (rshift @step2 INTEGER_CST@6)
> > > +                (plus:c@step2
> > > +                  (bit_and @step1 INTEGER_CST@3)
> > > +                  (bit_and
> > > +                    (rshift
> > > +                      (minus@step1
> > > +                          @0
> > > +                          (bit_and (rshift @0 INTEGER_CST@2) 
> > > INTEGER_CST@1))
> > > +                      INTEGER_CST@4)
> > > +                    INTEGER_CST@3)))
> > > +            INTEGER_CST@5)))
> > > +    INTEGER_CST@7)
> > > + (with {
> > > +   unsigned prec = TYPE_PRECISION (type);
> > > +   int shift = prec & 31 ;
> > > +   unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >>
> > shift;
> > > +   unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >>
> > shift;
> > > +   unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >>
> > shift;
> > > +   unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >>
> > shift;
> > > +   }
> > > +   (if (prec >= 16
> > > +        && prec <= 32
> > > +        && pow2p_hwi (prec)
> > > +        && TYPE_UNSIGNED (type)
> > > +        && integer_onep (@2)
> > > +        && wi::to_widest (@4) == 2
> > > +        && wi::to_widest (@6) == 4
> > > +        && wi::to_widest (@8) == 8
> > > +        && wi::to_widest (@10) == 16
> > > +        && tree_to_uhwi (@1) == c1
> > > +        && tree_to_uhwi (@3) == c2
> > > +        && tree_to_uhwi (@5) == c3
> > > +        && tree_to_uhwi (@7) == c4)
> > > +     (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> > > +                                            OPTIMIZE_FOR_BOTH))
> > > +        (convert (IFN_POPCOUNT:type @0)))))) #endif #if GIMPLE
> > > +
> > > +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
> > > +/*int pop(unsigned x) {
> > > +  x = x - ((x >> 1) & 0x55555555);
> > > +  x = x - 3*((x >> 2) & 0x33333333)
> > > +  x = (x + (x >> 4)) & 0x0F0F0F0F;
> > > +  x = x + (x >> 8);
> > > +  x = x + (x >> 16);
> > > +  return x & 0x0000003F;
> > > +}
> > > +*/
> > > +(simplify
> > > + (bit_and
> > > +   (plus
> > > +    (rshift @step4 INTEGER_CST@10)
> > > +     (plus:c@step4
> > > +        (rshift @step3 INTEGER_CST@8)
> > > +         (bit_and@step3
> > > +          (plus
> > > +              (rshift @step2 INTEGER_CST@6)
> > > +              (minus@step2
> > > +                 @step1
> > > +                 (mult:c
> > > +                  (bit_and
> > > +                   (rshift
> > > +                    (minus@step1
> > > +                       @0
> > > +                       (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
> > > +                    INTEGER_CST@4)
> > > +                  INTEGER_CST@3)
> > > +                 INTEGER_CST@11)))
> > > +            INTEGER_CST@5)))
> > > +    INTEGER_CST@7)
> > > + (with {
> > > +   unsigned prec = TYPE_PRECISION (type);
> > > +   int shift = prec & 31 ;
> > > +   unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >>
> > shift;
> > > +   unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >>
> > shift;
> > > +   unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >>
> > shift;
> > > +   unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >>
> > shift;
> > > +   }
> > > +   (if (prec >= 16
> > > +        && prec <= 32
> > > +        && pow2p_hwi (prec)
> > > +        && TYPE_UNSIGNED (type)
> > > +        && integer_onep (@2)
> > > +        && wi::to_widest (@4) == 2
> > > +        && wi::to_widest (@6) == 4
> > > +        && wi::to_widest (@8) == 8
> > > +        && wi::to_widest (@10) == 16
> > > +        && wi::to_widest (@11) == 3
> > > +        && tree_to_uhwi (@1) == c1
> > > +        && tree_to_uhwi (@3) == c2
> > > +        && tree_to_uhwi (@5) == c3
> > > +        && tree_to_uhwi (@7) == c4)
> > > +     (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> > > +                                            OPTIMIZE_FOR_BOTH))
> > > +        (convert (IFN_POPCOUNT:type @0)))))) #endif
> > > +
> > >  (for ffs (FFS)
> > >   /* __builtin_ffs (X) == 0 -> X == 0.
> > >      __builtin_ffs (X) == 6 -> (X & 63) == 32.  */ diff --git
> > > a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > > new file mode 100644
> > > index 00000000000..c70837fc53b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> > > @@ -0,0 +1,23 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-require-effective-target popcount } */
> > > +/* { dg-require-effective-target int32plus } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > > +
> > > +const unsigned m1  = 0x55555555UL;
> > > +const unsigned m2  = 0x33333333UL;
> > > +const unsigned m3  = 0x0F0F0F0FUL;
> > > +const unsigned m4  = 0x00FF00FFUL;
> > > +const unsigned m5  = 0x0000FFFFUL;
> > > +
> > > +int Gia_WordCountOnes32c( unsigned uword ) {
> > > +  uword = (uword & m1) + ((uword>>1) & m1);
> > > +  uword = (uword & m2) + ((uword>>2) & m2);
> > > +  uword = (uword & m3) + ((uword>>4) & m3);
> > > +  uword = (uword & m4) + ((uword>>8) & m4);
> > > +  return  (uword & m5) + (uword>>16); }
> > > +
> > > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > > +*/
> > > +
> > > +
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > > new file mode 100644
> > > index 00000000000..fc6c23b411b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> > > @@ -0,0 +1,23 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-require-effective-target popcount } */
> > > +/* { dg-require-effective-target int32plus } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > > +
> > > +const unsigned m1  = 0x55555555UL;
> > > +const unsigned m2  = 0x33333333UL;
> > > +const unsigned m3  = 0x0F0F0F0FUL;
> > > +const unsigned m4  = 0x00FF00FFUL;
> > > +const unsigned m5  = 0x0000FFFFUL;
> > > +
> > > +int Gia_WordCountOnes32c( unsigned uword ) {
> > > +  uword = (uword & m1) + ((uword>>1) & m1);
> > > +  uword = (uword & m2) + ((uword>>2) & m2);
> > > +  uword = (uword & m3) + ((uword>>4) & m3);
> > > +  uword = (uword & m4) + ((uword>>8) & m4);
> > > +  return  (uword & m5) + ((uword>>16) & m5); }
> > > +
> > > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > > +*/
> > > +
> > > +
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > > new file mode 100644
> > > index 00000000000..5a12e6892aa
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> > > @@ -0,0 +1,22 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-require-effective-target popcount } */
> > > +/* { dg-require-effective-target int32plus } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > > +
> > > +const unsigned m1  = 0x55555555UL;
> > > +const unsigned m2  = 0x33333333UL;
> > > +const unsigned m3  = 0x0F0F0F0FUL;
> > > +const unsigned m4  = 0x0000003F;
> > > +
> > > +int pop32c(unsigned x) {
> > > +  x = x - ((x >> 1) & m1);
> > > +  x = (x & m2) + ((x >> 2) & m2);
> > > +  x = (x + (x >> 4)) & m3;
> > > +  x = x + (x >> 8);
> > > +  x = x + (x >> 16);
> > > +  return x & m4;
> > > +}
> > > +
> > > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > > +*/
> > > +
> > > +
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > > new file mode 100644
> > > index 00000000000..4fb08d34984
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> > > @@ -0,0 +1,22 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-require-effective-target popcount } */
> > > +/* { dg-require-effective-target int32plus } */
> > > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > > +
> > > +const unsigned m1  = 0x55555555UL;
> > > +const unsigned m2  = 0x33333333UL;
> > > +const unsigned m3  = 0x0F0F0F0FUL;
> > > +const unsigned m4  = 0x0000003F;
> > > +
> > > +int popc(unsigned x) {
> > > +  x = x - ((x >> 1) & m1);
> > > +  x = x - 3*((x >> 2) & m2);
> > > +  x = (x + (x >> 4)) & m3;
> > > +  x = x + (x >> 8);
> > > +  x = x + (x >> 16);
> > > +  return x & m4;
> > > +}
> > > +
> > > +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } }
> > > +*/
> > > +
> > > +
> > > --
> > > 2.34.1
> > >
> >
> > --
> > Richard Biener <[email protected]>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > Nuernberg)
> 
> Updated patch:
> 
> gcc/ChangeLog:
> 
> * match.pd:
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/tree-ssa/popcount7.c: New test.
> * gcc.dg/tree-ssa/popcount7_2.c: New test.
> * gcc.dg/tree-ssa/popcount8.c: New test.
> * gcc.dg/tree-ssa/popcount9.c: New test.
> 
> From 99ef45b9cd509e428ddb2e89c3784b86b1d92f5e Mon Sep 17 00:00:00 2001
> From: Reshma Roy <[email protected]>
> Date: Thu, 11 Dec 2025 10:57:53 +0530
> Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from Hacker's
>  Delight
> 
> Pattern 1:
> int Gia_WordCountOnes32c( uint32_t uword )
> {
>   uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
>   uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
>   uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
>   uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
>   return  (uword & 0x0000ffff) + (uword>>16);
>   or
>   return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF);
> }
> 
> Pattern 2:
> int pop(unsigned x) {
>   x = x - ((x >> 1) & 0x55555555);
>   x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
>   x = (x + (x >> 4)) & 0x0F0F0F0F;
>   x = x + (x >> 8);
>   x = x + (x >> 16);
>   return x & 0x0000003F;
> }
> 
> Pattern 3:
> int pop(unsigned x) {
>   x = x - ((x >> 1) & 0x55555555);
>   x = x - 3*((x >> 2) & 0x33333333)
>     x = (x + (x >> 4)) & 0x0F0F0F0F;
>   x = x + (x >> 8);
>   x = x + (x >> 16);
>   return x & 0x0000003F;
> }
> ---
>  gcc/match.pd                                | 185 ++++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/popcount7.c   |  23 +++
>  gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c |  23 +++
>  gcc/testsuite/gcc.dg/tree-ssa/popcount8.c   |  22 +++
>  gcc/testsuite/gcc.dg/tree-ssa/popcount9.c   |  22 +++
>  5 files changed, 275 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index bf410a75f5f..380ec0ee694 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
>  #endif
> 
> +#if GIMPLE
> +/* To recognize the popcnt pattern for 32-bit from Hacker's Delight
> +   int Gia_WordCountOnes32c( uint32_t uword )
> +   {
> +   uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555);
> +   uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333);
> +   uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f);
> +   uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff);
> +   return  (uword & 0x0000ffff) + (uword>>16);
> +   or
> +   return  (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff);
> +   }
> +*/
> +
> +  (simplify
> +     (plus:c
> +       (bit_and @step_4 INTEGER_CST@9)
> +       (rshift
> +         (plus:c@step4
> +            (bit_and @step3 INTEGER_CST@7)
> +         (bit_and
> +           (rshift
> +             (plus:c@step3
> +               (bit_and @step2 INTEGER_CST@5)
> +               (bit_and
> +                 (rshift
> +                   (plus:c@step2
> +                     (bit_and @step1 INTEGER_CST@3)
> +                     (bit_and
> +                       (rshift
> +                         (plus:c@step1
> +                            (bit_and @0 INTEGER_CST@1)
> +                            (bit_and (rshift @0 INTEGER_CST@2) @1))
> +                         INTEGER_CST@4)
> +                       INTEGER_CST@3))
> +                    INTEGER_CST@6)
> +                   INTEGER_CST@5))
> +               INTEGER_CST@8)
> +             INTEGER_CST@7))
> +           INTEGER_CST@10))
> +   (with {
> +    unsigned prec = TYPE_PRECISION (type);
> +    int shift = prec & 31 ;
> +    unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
> +    unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
> +    unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
> +    unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >> shift;
> +    unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >> shift;
> +    }
> +    (if (prec >= 16
> +         && prec <= 32
> +         && pow2p_hwi (prec)
> +         && TYPE_UNSIGNED (type)
> +         && integer_onep (@2)
> +         && wi::to_widest (@4) == 2
> +         && wi::to_widest (@6) == 4
> +         && wi::to_widest (@8) == 8
> +         && wi::to_widest (@10) == 16
> +         && tree_to_uhwi (@1) == c1
> +         && tree_to_uhwi (@3) == c2
> +         && tree_to_uhwi (@5) == c3
> +         && tree_to_uhwi (@7) == c4
> +         && tree_to_uhwi (@9) == c5)
> +       (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> +                                            OPTIMIZE_FOR_BOTH))
> +        (convert (IFN_POPCOUNT:type @0))))))
> +#endif
> +#if GIMPLE
> +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
> +int pop(unsigned x) {
> +x = x - ((x >> 1) & 0x55555555);
> +x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> +x = x - 3*((x >> 2) & 0x33333333);
> +x = (x + (x >> 4)) & 0x0F0F0F0F;
> +x = x + (x >> 8);
> +x = x + (x >> 16);
> +return x & 0x0000003F
> +}
> +*/
> +(simplify
> + (bit_and
> +   (plus
> +    (rshift @step4 INTEGER_CST@10)
> +     (plus:c@step4
> +        (rshift @step3 INTEGER_CST@8)
> +         (bit_and@step3
> +          (plus
> +              (rshift @step2 INTEGER_CST@6)
> +                (plus:c@step2
> +                  (bit_and @step1 INTEGER_CST@3)
> +                  (bit_and
> +                    (rshift
> +                      (minus@step1
> +                          @0
> +                          (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
> +                      INTEGER_CST@4)
> +                    INTEGER_CST@3)))
> +            INTEGER_CST@5)))
> +    INTEGER_CST@7)
> + (with {
> +   unsigned prec = TYPE_PRECISION (type);
> +   int shift = prec & 31 ;
> +   unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
> +   unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
> +   unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
> +   unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
> +   }
> +   (if (prec >= 16
> +        && prec <= 32
> +        && pow2p_hwi (prec)
> +        && TYPE_UNSIGNED (type)
> +        && integer_onep (@2)
> +        && wi::to_widest (@4) == 2
> +        && wi::to_widest (@6) == 4
> +        && wi::to_widest (@8) == 8
> +        && wi::to_widest (@10) == 16
> +        && tree_to_uhwi (@1) == c1
> +        && tree_to_uhwi (@3) == c2
> +        && tree_to_uhwi (@5) == c3
> +        && tree_to_uhwi (@7) == c4)
> +     (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> +                                            OPTIMIZE_FOR_BOTH))
> +        (convert (IFN_POPCOUNT:type @0))))))
> +#endif
> +#if GIMPLE
> +
> +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight
> +/*int pop(unsigned x) {
> +  x = x - ((x >> 1) & 0x55555555);
> +  x = x - 3*((x >> 2) & 0x33333333)
> +  x = (x + (x >> 4)) & 0x0F0F0F0F;
> +  x = x + (x >> 8);
> +  x = x + (x >> 16);
> +  return x & 0x0000003F;
> +}
> +*/
> +(simplify
> + (bit_and
> +   (plus
> +    (rshift @step4 INTEGER_CST@10)
> +     (plus:c@step4
> +        (rshift @step3 INTEGER_CST@8)
> +         (bit_and@step3
> +          (plus
> +              (rshift @step2 INTEGER_CST@6)
> +              (minus@step2
> +                 @step1
> +                 (mult:c
> +                  (bit_and
> +                   (rshift
> +                    (minus@step1
> +                       @0
> +                       (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1))
> +                    INTEGER_CST@4)
> +                  INTEGER_CST@3)
> +                 INTEGER_CST@11)))
> +            INTEGER_CST@5)))
> +    INTEGER_CST@7)
> + (with {
> +   unsigned prec = TYPE_PRECISION (type);
> +   int shift = prec & 31 ;
> +   unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift;
> +   unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift;
> +   unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift;
> +   unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift;
> +   }
> +   (if (prec >= 16
> +        && prec <= 32
> +        && pow2p_hwi (prec)
> +        && TYPE_UNSIGNED (type)
> +        && integer_onep (@2)
> +        && wi::to_widest (@4) == 2
> +        && wi::to_widest (@6) == 4
> +        && wi::to_widest (@8) == 8
> +        && wi::to_widest (@10) == 16
> +        && wi::to_widest (@11) == 3
> +        && tree_to_uhwi (@1) == c1
> +        && tree_to_uhwi (@3) == c2
> +        && tree_to_uhwi (@5) == c3
> +        && tree_to_uhwi (@7) == c4)
> +     (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
> +                                            OPTIMIZE_FOR_BOTH))
> +        (convert (IFN_POPCOUNT:type @0))))))
> +#endif
> +
>  (for ffs (FFS)
>   /* __builtin_ffs (X) == 0 -> X == 0.
>      __builtin_ffs (X) == 6 -> (X & 63) == 32.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> new file mode 100644
> index 00000000000..c70837fc53b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1  = 0x55555555UL;
> +const unsigned m2  = 0x33333333UL;
> +const unsigned m3  = 0x0F0F0F0FUL;
> +const unsigned m4  = 0x00FF00FFUL;
> +const unsigned m5  = 0x0000FFFFUL;
> +
> +int Gia_WordCountOnes32c( unsigned uword )
> +{
> +  uword = (uword & m1) + ((uword>>1) & m1);
> +  uword = (uword & m2) + ((uword>>2) & m2);
> +  uword = (uword & m3) + ((uword>>4) & m3);
> +  uword = (uword & m4) + ((uword>>8) & m4);
> +  return  (uword & m5) + (uword>>16);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> new file mode 100644
> index 00000000000..fc6c23b411b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1  = 0x55555555UL;
> +const unsigned m2  = 0x33333333UL;
> +const unsigned m3  = 0x0F0F0F0FUL;
> +const unsigned m4  = 0x00FF00FFUL;
> +const unsigned m5  = 0x0000FFFFUL;
> +
> +int Gia_WordCountOnes32c( unsigned uword )
> +{
> +  uword = (uword & m1) + ((uword>>1) & m1);
> +  uword = (uword & m2) + ((uword>>2) & m2);
> +  uword = (uword & m3) + ((uword>>4) & m3);
> +  uword = (uword & m4) + ((uword>>8) & m4);
> +  return  (uword & m5) + ((uword>>16) & m5);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> new file mode 100644
> index 00000000000..5a12e6892aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1  = 0x55555555UL;
> +const unsigned m2  = 0x33333333UL;
> +const unsigned m3  = 0x0F0F0F0FUL;
> +const unsigned m4  = 0x0000003F;
> +
> +int pop32c(unsigned x) {
> +  x = x - ((x >> 1) & m1);
> +  x = (x & m2) + ((x >> 2) & m2);
> +  x = (x + (x >> 4)) & m3;
> +  x = x + (x >> 8);
> +  x = x + (x >> 16);
> +  return x & m4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> new file mode 100644
> index 00000000000..4fb08d34984
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1  = 0x55555555UL;
> +const unsigned m2  = 0x33333333UL;
> +const unsigned m3  = 0x0F0F0F0FUL;
> +const unsigned m4  = 0x0000003F;
> +
> +int popc(unsigned x) {
> +  x = x - ((x >> 1) & m1);
> +  x = x - 3*((x >> 2) & m2);
> +  x = (x + (x >> 4)) & m3;
> +  x = x + (x >> 8);
> +  x = x + (x >> 16);
> +  return x & m4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
> +
> +
> --
> 2.34.1
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to