Here is the final version of this patch, implementing Richard's latest suggestion, as committed to mainline. This revision has been tested on x86_64-pc-linux-gnu with a "make bootstrap" and "make -k check" with no new regressions.
Many thanks for the helpful feedback about match.pd's BUILT_IN_ iterators; I'll post a follow-up patch to clean-up/make use of this idiom elsewhere in match.pd, as it is much nicer. Cheers, Roger -- -----Original Message----- From: Richard Biener <richard.guent...@gmail.com> Sent: 23 July 2020 10:02 To: GCC Patches <gcc-patches@gcc.gnu.org> Cc: Roger Sayle <ro...@nextmovesoftware.com> Subject: Re: [PATCH] middle-end: Fold popcount(x&4) to (x>>2)&1 and friends. On Thu, Jul 23, 2020 at 10:15 AM Marc Glisse <marc.gli...@inria.fr> wrote: > > On Wed, 22 Jul 2020, Roger Sayle wrote: > > > Many thanks for the peer review and feedback. I completely agree > > that POPCOUNT and PARITY iterators simplifies things and handle the IFN_ > > variants. > > Is there a reason why the iterators cannot be used for this one? > > +(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL > + BUILT_IN_POPCOUNTIMAX) > + parity (BUILT_IN_PARITY BUILT_IN_PARITYL BUILT_IN_PARITYLL > + BUILT_IN_PARITYIMAX) > + (simplify > + (bit_and (popcount @0) integer_onep) > + (parity @0))) I should indeed work fine to even do (simplify (bit_and (POPCOUNT @0) integer_onep) (PARITY @0)) Likewise for the existing +/* popcount(X) == 0 is X == 0, and related (in)equalities. */ (for +popcount (POPCOUNT) (for cmp (le eq ne gt) rep (eq eq ne ne) (simplify (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) you can elide the (for ...) OK with those changes. Richard. > > -- > Marc Glisse
diff --git a/gcc/match.pd b/gcc/match.pd index c6ae7a7..a052c9e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5964,25 +5964,51 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (IFN_FMA @0 @1 @2)))) /* POPCOUNT simplifications. */ -(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL - BUILT_IN_POPCOUNTIMAX) - /* popcount(X&1) is nop_expr(X&1). */ - (simplify - (popcount @0) - (if (tree_nonzero_bits (@0) == 1) - (convert @0))) - /* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero. */ - (simplify - (plus (popcount:s @0) (popcount:s @1)) - (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0) - (popcount (bit_ior @0 @1)))) - /* popcount(X) == 0 is X == 0, and related (in)equalities. */ +/* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero. */ +(simplify + (plus (POPCOUNT:s @0) (POPCOUNT:s @1)) + (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0) + (POPCOUNT (bit_ior @0 @1)))) + +/* popcount(X) == 0 is X == 0, and related (in)equalities. */ +(for popcount (POPCOUNT) (for cmp (le eq ne gt) rep (eq eq ne ne) (simplify (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) +/* Canonicalize POPCOUNT(x)&1 as PARITY(X). */ +(simplify + (bit_and (POPCOUNT @0) integer_onep) + (PARITY @0)) + +/* PARITY simplifications. */ +/* parity(~X) is parity(X). */ +(simplify + (PARITY (bit_not @0)) + (PARITY @0)) + +/* parity(X)^parity(Y) is parity(X^Y). */ +(simplify + (bit_xor (PARITY:s @0) (PARITY:s @1)) + (PARITY (bit_xor @0 @1))) + +/* Common POPCOUNT/PARITY simplifications. */ +/* popcount(X&C1) is (X>>C2)&1 when C1 == 1<<C2. Same for parity(X&C1). */ +(for pfun (POPCOUNT PARITY) + (simplify + (pfun @0) + (with { wide_int nz = tree_nonzero_bits (@0); } + (switch + (if (nz == 1) + (convert @0)) + (if (wi::popcount (nz) == 1) + (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); } + (convert (rshift:utype (convert:utype @0) + { build_int_cst (integer_type_node, + wi::ctz (nz)); })))))))) + #if GIMPLE /* 64- and 32-bits branchless implementations of popcount are detected: diff --git a/gcc/testsuite/gcc.dg/fold-popcount-5.c b/gcc/testsuite/gcc.dg/fold-popcount-5.c new file mode 100644 index 0000000..943726f --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-popcount-5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int test_and4(unsigned int a) +{ + return __builtin_popcount(a&4); +} + +int test_and4l(unsigned long b) +{ + return __builtin_popcountl(b&4); +} + +int test_and4ll(unsigned long long c) +{ + return __builtin_popcountll(c&4); +} + +int test_shift(unsigned int d) +{ + int bits = 8*sizeof(unsigned int)-1; + return __builtin_popcount(d<<31); +} + +int test_shiftl(unsigned long e) +{ + int bits = 8*sizeof(unsigned long)-1; + return __builtin_popcountl(e<<bits); +} + +int test_shiftll(unsigned long long f) +{ + int bits = 8*sizeof(unsigned long long)-1; + return __builtin_popcountll(f<<bits); +} + +/* { dg-final { scan-tree-dump-times "popcount" 0 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-1.c b/gcc/testsuite/gcc.dg/fold-parity-1.c new file mode 100644 index 0000000..3ba56c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-original" } */ + +int foo(unsigned int x) +{ + return __builtin_popcount(x) & 1; +} + +int fool(unsigned long x) +{ + return __builtin_popcountl(x) & 1; +} + +int fooll(unsigned long long x) +{ + return __builtin_popcountll(x) & 1; +} + +/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "original" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "original" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-2.c b/gcc/testsuite/gcc.dg/fold-parity-2.c new file mode 100644 index 0000000..8c7acbf --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ + return __builtin_parity(~x); +} + +int fool(unsigned long x) +{ + return __builtin_parityl(~x); +} + +int fooll(unsigned long long x) +{ + return __builtin_parityll(~x); +} + +/* { dg-final { scan-tree-dump-times "~" 0 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-3.c b/gcc/testsuite/gcc.dg/fold-parity-3.c new file mode 100644 index 0000000..e0355cc --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ + return __builtin_parity(x&1); +} + +int fool(unsigned long x) +{ + return __builtin_parityl(x&1); +} + +int fooll(unsigned long long x) +{ + return __builtin_parityll(x&1); +} + +/* { dg-final { scan-tree-dump-times "__builtin_parity" 0 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-4.c b/gcc/testsuite/gcc.dg/fold-parity-4.c new file mode 100644 index 0000000..5dfedab --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-4.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x, unsigned int y) +{ + return __builtin_parity(x) ^ __builtin_parity(y); +} + +int fool(unsigned long x, unsigned long y) +{ + return __builtin_parityl(x) ^ __builtin_parityl(y); +} + +int fooll(unsigned long long x, unsigned long long y) +{ + return __builtin_parityll(x) ^ __builtin_parityll(y); +} + +/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-5.c b/gcc/testsuite/gcc.dg/fold-parity-5.c new file mode 100644 index 0000000..69d3a6a --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int test_and4(unsigned int a) +{ + return __builtin_parity(a&4); +} + +int test_and4l(unsigned long b) +{ + return __builtin_parityl(b&4); +} + +int test_and4ll(unsigned long long c) +{ + return __builtin_parityll(c&4); +} + +int test_shift(unsigned int d) +{ + int bits = 8*sizeof(unsigned int)-1; + return __builtin_parity(d<<31); +} + +int test_shiftl(unsigned long e) +{ + int bits = 8*sizeof(unsigned long)-1; + return __builtin_parityl(e<<bits); +} + +int test_shiftll(unsigned long long f) +{ + int bits = 8*sizeof(unsigned long long)-1; + return __builtin_parityll(f<<bits); +} + +/* { dg-final { scan-tree-dump-times "parity" 0 "optimized" } } */ +
patcha4.log
Description: Binary data