This patch adds matching for Hamming weight (popcount) implementation. The following sources:
int foo64 (unsigned long long a) { unsigned long long b = a; b -= ((b>>1) & 0x5555555555555555ULL); b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL; b *= 0x0101010101010101ULL; return (int)(b >> 56); } and int foo32 (unsigned int a) { unsigned long b = a; b -= ((b>>1) & 0x55555555UL); b = ((b>>2) & 0x33333333UL) + (b & 0x33333333UL); b = ((b>>4) + b) & 0x0F0F0F0FUL; b *= 0x01010101UL; return (int)(b >> 24); } and equivalents are now recognized as popcount for platforms with hw popcount support. Bootstrapped and tested on x86_64-pc-linux-gnu and aarch64-linux-gnu systems with no regressions. (I have no write access to repo) Thanks, Dmitrij gcc/ChangeLog: PR tree-optimization/90836 * gcc/match.pd (popcount): New pattern. gcc/testsuite/ChangeLog: PR tree-optimization/90836 * lib/target-supports.exp (check_effective_target_popcount) (check_effective_target_popcountll): New effective targets. * gcc.dg/tree-ssa/popcount4.c: New test. * gcc.dg/tree-ssa/popcount4l.c: New test. * gcc.dg/tree-ssa/popcount4ll.c: New test.
diff --git a/gcc/match.pd b/gcc/match.pd index 0317bc7..b1867bf 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5358,6 +5358,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) +/* 64- and 32-bits branchless implementations of popcount are detected: + + int popcount64c (uint64_t x) + { + x -= (x >> 1) & 0x5555555555555555ULL; + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + return (x * 0x0101010101010101ULL) >> 56; + } + + int popcount32c (uint32_t x) + { + x -= (x >> 1) & 0x55555555; + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0f0f0f0f; + return (x * 0x01010101) >> 24; + } */ +(simplify + (convert + (rshift + (mult + (bit_and:c + (plus:c + (rshift @8 INTEGER_CST@5) + (plus:c@8 + (bit_and @6 INTEGER_CST@7) + (bit_and + (rshift + (minus@6 + @0 + (bit_and + (rshift @0 INTEGER_CST@4) + INTEGER_CST@11)) + INTEGER_CST@10) + INTEGER_CST@9))) + INTEGER_CST@3) + INTEGER_CST@2) + INTEGER_CST@1)) + /* Check constants and optab. */ + (with + { + tree argtype = TREE_TYPE (@0); + unsigned prec = TYPE_PRECISION (argtype); + int shift = TYPE_PRECISION (long_long_unsigned_type_node) - prec; + const unsigned long long c1 = 0x0101010101010101ULL >> shift, + c2 = 0x0F0F0F0F0F0F0F0FULL >> shift, + c3 = 0x3333333333333333ULL >> shift, + c4 = 0x5555555555555555ULL >> shift; + } + (if (types_match (type, integer_type_node) && tree_to_uhwi (@4) == 1 + && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4 + && tree_to_uhwi (@1) == prec - 8 && tree_to_uhwi (@2) == c1 + && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3 + && tree_to_uhwi (@7) == c3 && tree_to_uhwi (@11) == c4 + && optab_handler (popcount_optab, TYPE_MODE (argtype)) + != CODE_FOR_nothing) + (switch + (if (types_match (argtype, long_long_unsigned_type_node)) + (BUILT_IN_POPCOUNTLL @0)) + (if (types_match (argtype, long_unsigned_type_node)) + (BUILT_IN_POPCOUNTL @0)) + (if (types_match (argtype, unsigned_type_node)) + (BUILT_IN_POPCOUNT @0)))))) + /* Simplify: a = a1 op a2 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c new file mode 100644 index 0000000..9f759f8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcount } */ +/* { dg-require-effective-target int32plus } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned m1 = 0x55555555UL; +const unsigned m2 = 0x33333333UL; +const unsigned m4 = 0x0F0F0F0FUL; +const unsigned h01 = 0x01010101UL; +const int shift = 24; + +int popcount64c(unsigned x) +{ + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + return (x * h01) >> shift; +} + +/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } */ + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c new file mode 100644 index 0000000..ab33f79 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcountl } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +#if __SIZEOF_LONG__ == 4 +const unsigned long m1 = 0x55555555UL; +const unsigned long m2 = 0x33333333UL; +const unsigned long m4 = 0x0F0F0F0FUL; +const unsigned long h01 = 0x01010101UL; +const int shift = 24; +#else +const unsigned long m1 = 0x5555555555555555UL; +const unsigned long m2 = 0x3333333333333333UL; +const unsigned long m4 = 0x0f0f0f0f0f0f0f0fUL; +const unsigned long h01 = 0x0101010101010101UL; +const int shift = 56; +#endif + + +int popcount64c(unsigned long x) +{ + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + return (x * h01) >> shift; +} + +/* { dg-final { scan-tree-dump-times "__builtin_popcountl" 1 "optimized" } } */ + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c new file mode 100644 index 0000000..f3755f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target popcountll } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned long long m1 = 0x5555555555555555ULL; +const unsigned long long m2 = 0x3333333333333333ULL; +const unsigned long long m4 = 0x0F0F0F0F0F0F0F0FULL; +const unsigned long long h01 = 0x0101010101010101ULL; +const int shift = 56; + +int popcount64c(unsigned long long x) +{ + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + return (x * h01) >> shift; +} + +/* { dg-final { scan-tree-dump-times "__builtin_popcountll" 1 "optimized" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3c50b89..32bbad7 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6855,6 +6855,29 @@ proc check_effective_target_popcountl { } { } "" ] } +# Return 1 if the target supports popcount on long long. + +proc check_effective_target_popcountll { } { + return [check_no_messages_and_pattern popcountll "!\\(call" rtl-expand { + int foo (long long b) + { + return __builtin_popcountll (b); + } + } "" ] +} + + +# Return 1 if the target supports popcount on int. + +proc check_effective_target_popcount { } { + return [check_no_messages_and_pattern popcount "!\\(call" rtl-expand { + int foo (int b) + { + return __builtin_popcount (b); + } + } "" ] +} + # Return 1 if the target supports atomic operations on "long long" # and can execute them. #