[Public] Hi,
This patch add support for enabling POPCNT generation for 32-bit patterns from Hacker's delight. Bootstrapped and tested on x86. Thank You, Reshma Roy gcc/ChangeLog: * match.pd: gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/popcount7.c: New test. * gcc.dg/tree-ssa/popcount7_2.c: New test. * gcc.dg/tree-ssa/popcount8.c: New test. * gcc.dg/tree-ssa/popcount9.c: New test. >From 513656c2a431fc5e7215bc9977a084e06f2d6e4b Mon Sep 17 00:00:00 2001 From: Reshma Roy <[email protected]> Date: Thu, 11 Dec 2025 10:57:53 +0530 Subject: [PATCH] Enabling POPCNT generation for 32-bit pattern from Hacker's Delight Pattern 1: int Gia_WordCountOnes32c( uint32_t uword ) { uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555); uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333); uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f); uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff); return (uword & 0x0000ffff) + (uword>>16); or return (uword & 0x0000FFFF) + ((uword >> 16) & 0x0000FFFF); } Pattern 2: int pop(unsigned x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x >> 8); x = x + (x >> 16); return x & 0x0000003F; } Pattern 3: int pop(unsigned x) { x = x - ((x >> 1) & 0x55555555); x = x - 3*((x >> 2) & 0x33333333) x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x >> 8); x = x + (x >> 16); return x & 0x0000003F; } --- gcc/match.pd | 185 ++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/popcount7.c | 23 +++ gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c | 23 +++ gcc/testsuite/gcc.dg/tree-ssa/popcount8.c | 22 +++ gcc/testsuite/gcc.dg/tree-ssa/popcount9.c | 22 +++ 5 files changed, 275 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount8.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount9.c diff --git a/gcc/match.pd b/gcc/match.pd index bf410a75f5f..380ec0ee694 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -10883,6 +10883,191 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); })))) #endif +#if GIMPLE +/* To recognize the popcnt pattern for 32-bit from Hacker's Delight + int Gia_WordCountOnes32c( uint32_t uword ) + { + uword = (uword & 0x55555555) + ((uword>>1) & 0x55555555); + uword = (uword & 0x33333333) + ((uword>>2) & 0x33333333); + uword = (uword & 0x0f0f0f0f) + ((uword>>4) & 0x0f0f0f0f); + uword = (uword & 0x00ff00ff) + ((uword>>8) & 0x00ff00ff); + return (uword & 0x0000ffff) + (uword>>16); + or + return (uword & 0x0000ffff) + ((uword>>16) & 0x0000ffff); + } +*/ + + (simplify + (plus:c + (bit_and @step_4 INTEGER_CST@9) + (rshift + (plus:c@step4 + (bit_and @step3 INTEGER_CST@7) + (bit_and + (rshift + (plus:c@step3 + (bit_and @step2 INTEGER_CST@5) + (bit_and + (rshift + (plus:c@step2 + (bit_and @step1 INTEGER_CST@3) + (bit_and + (rshift + (plus:c@step1 + (bit_and @0 INTEGER_CST@1) + (bit_and (rshift @0 INTEGER_CST@2) @1)) + INTEGER_CST@4) + INTEGER_CST@3)) + INTEGER_CST@6) + INTEGER_CST@5)) + INTEGER_CST@8) + INTEGER_CST@7)) + INTEGER_CST@10)) + (with { + unsigned prec = TYPE_PRECISION (type); + int shift = prec & 31 ; + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift; + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift; + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift; + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x00FF00FF) >> shift; + unsigned HOST_WIDE_INT c5 = HOST_WIDE_INT_UC (0x0000FFFF) >> shift; + } + (if (prec >= 16 + && prec <= 32 + && pow2p_hwi (prec) + && TYPE_UNSIGNED (type) + && integer_onep (@2) + && wi::to_widest (@4) == 2 + && wi::to_widest (@6) == 4 + && wi::to_widest (@8) == 8 + && wi::to_widest (@10) == 16 + && tree_to_uhwi (@1) == c1 + && tree_to_uhwi (@3) == c2 + && tree_to_uhwi (@5) == c3 + && tree_to_uhwi (@7) == c4 + && tree_to_uhwi (@9) == c5) + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, + OPTIMIZE_FOR_BOTH)) + (convert (IFN_POPCOUNT:type @0)))))) +#endif +#if GIMPLE +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight +int pop(unsigned x) { +x = x - ((x >> 1) & 0x55555555); +x = (x & 0x33333333) + ((x >> 2) & 0x33333333); +x = x - 3*((x >> 2) & 0x33333333); +x = (x + (x >> 4)) & 0x0F0F0F0F; +x = x + (x >> 8); +x = x + (x >> 16); +return x & 0x0000003F +} +*/ +(simplify + (bit_and + (plus + (rshift @step4 INTEGER_CST@10) + (plus:c@step4 + (rshift @step3 INTEGER_CST@8) + (bit_and@step3 + (plus + (rshift @step2 INTEGER_CST@6) + (plus:c@step2 + (bit_and @step1 INTEGER_CST@3) + (bit_and + (rshift + (minus@step1 + @0 + (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1)) + INTEGER_CST@4) + INTEGER_CST@3))) + INTEGER_CST@5))) + INTEGER_CST@7) + (with { + unsigned prec = TYPE_PRECISION (type); + int shift = prec & 31 ; + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift; + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift; + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift; + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift; + } + (if (prec >= 16 + && prec <= 32 + && pow2p_hwi (prec) + && TYPE_UNSIGNED (type) + && integer_onep (@2) + && wi::to_widest (@4) == 2 + && wi::to_widest (@6) == 4 + && wi::to_widest (@8) == 8 + && wi::to_widest (@10) == 16 + && tree_to_uhwi (@1) == c1 + && tree_to_uhwi (@3) == c2 + && tree_to_uhwi (@5) == c3 + && tree_to_uhwi (@7) == c4) + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, + OPTIMIZE_FOR_BOTH)) + (convert (IFN_POPCOUNT:type @0)))))) +#endif +#if GIMPLE + +/*To recognize the popcnt pattern for 32-bit from Hacker's Delight +/*int pop(unsigned x) { + x = x - ((x >> 1) & 0x55555555); + x = x - 3*((x >> 2) & 0x33333333) + x = (x + (x >> 4)) & 0x0F0F0F0F; + x = x + (x >> 8); + x = x + (x >> 16); + return x & 0x0000003F; +} +*/ +(simplify + (bit_and + (plus + (rshift @step4 INTEGER_CST@10) + (plus:c@step4 + (rshift @step3 INTEGER_CST@8) + (bit_and@step3 + (plus + (rshift @step2 INTEGER_CST@6) + (minus@step2 + @step1 + (mult:c + (bit_and + (rshift + (minus@step1 + @0 + (bit_and (rshift @0 INTEGER_CST@2) INTEGER_CST@1)) + INTEGER_CST@4) + INTEGER_CST@3) + INTEGER_CST@11))) + INTEGER_CST@5))) + INTEGER_CST@7) + (with { + unsigned prec = TYPE_PRECISION (type); + int shift = prec & 31 ; + unsigned HOST_WIDE_INT c1 = HOST_WIDE_INT_UC (0x55555555) >> shift; + unsigned HOST_WIDE_INT c2 = HOST_WIDE_INT_UC (0x33333333) >> shift; + unsigned HOST_WIDE_INT c3 = HOST_WIDE_INT_UC (0x0F0F0F0F) >> shift; + unsigned HOST_WIDE_INT c4 = HOST_WIDE_INT_UC (0x0000003F) >> shift; + } + (if (prec >= 16 + && prec <= 32 + && pow2p_hwi (prec) + && TYPE_UNSIGNED (type) + && integer_onep (@2) + && wi::to_widest (@4) == 2 + && wi::to_widest (@6) == 4 + && wi::to_widest (@8) == 8 + && wi::to_widest (@10) == 16 + && wi::to_widest (@11) == 3 + && tree_to_uhwi (@1) == c1 + && tree_to_uhwi (@3) == c2 + && tree_to_uhwi (@5) == c3 + && tree_to_uhwi (@7) == c4) + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, + OPTIMIZE_FOR_BOTH)) + (convert (IFN_POPCOUNT:type @0)))))) +#endif + (for ffs (FFS) /* __builtin_ffs (X) == 0 -> X == 0. __builtin_ffs (X) == 6 -> (X & 63) == 32. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c new file mode 100644 index 00000000000..c70837fc53b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcount } */ +/* { dg-require-effective-target int32plus } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned m1 = 0x55555555UL; +const unsigned m2 = 0x33333333UL; +const unsigned m3 = 0x0F0F0F0FUL; +const unsigned m4 = 0x00FF00FFUL; +const unsigned m5 = 0x0000FFFFUL; + +int Gia_WordCountOnes32c( unsigned uword ) +{ + uword = (uword & m1) + ((uword>>1) & m1); + uword = (uword & m2) + ((uword>>2) & m2); + uword = (uword & m3) + ((uword>>4) & m3); + uword = (uword & m4) + ((uword>>8) & m4); + return (uword & m5) + (uword>>16); +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c new file mode 100644 index 00000000000..fc6c23b411b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount7_2.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcount } */ +/* { dg-require-effective-target int32plus } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned m1 = 0x55555555UL; +const unsigned m2 = 0x33333333UL; +const unsigned m3 = 0x0F0F0F0FUL; +const unsigned m4 = 0x00FF00FFUL; +const unsigned m5 = 0x0000FFFFUL; + +int Gia_WordCountOnes32c( unsigned uword ) +{ + uword = (uword & m1) + ((uword>>1) & m1); + uword = (uword & m2) + ((uword>>2) & m2); + uword = (uword & m3) + ((uword>>4) & m3); + uword = (uword & m4) + ((uword>>8) & m4); + return (uword & m5) + ((uword>>16) & m5); +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c new file mode 100644 index 00000000000..5a12e6892aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcount } */ +/* { dg-require-effective-target int32plus } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned m1 = 0x55555555UL; +const unsigned m2 = 0x33333333UL; +const unsigned m3 = 0x0F0F0F0FUL; +const unsigned m4 = 0x0000003F; + +int pop32c(unsigned x) { + x = x - ((x >> 1) & m1); + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m3; + x = x + (x >> 8); + x = x + (x >> 16); + return x & m4; +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c new file mode 100644 index 00000000000..4fb08d34984 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount9.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcount } */ +/* { dg-require-effective-target int32plus } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned m1 = 0x55555555UL; +const unsigned m2 = 0x33333333UL; +const unsigned m3 = 0x0F0F0F0FUL; +const unsigned m4 = 0x0000003F; + +int popc(unsigned x) { + x = x - ((x >> 1) & m1); + x = x - 3*((x >> 2) & m2); + x = (x + (x >> 4)) & m3; + x = x + (x >> 8); + x = x + (x >> 16); + return x & m4; +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ + + -- 2.34.1
