Hi Marc, Thanks for the feedback. After some quality time in gdb, I now appreciate that match.pd behaves (subtly) differently between generic and gimple, and the trees actually being passed to tree_nonzero_bits were not quite what I had expected. Sorry for my confusion, the revised patch below is now much shorter (and my follow-up patch that was originally to tree_nonzero_bits looks like it now needs to be to get_nonzero_bits!).
This revised patch has been retested on 864_64-pc-linux-gnu with a "make bootstrap" and "make -k check" with no new failures. Ok for mainline? 2021-07-28 Roger Sayle <ro...@nextmovesoftware.com> Marc Glisse <marc.gli...@inria.fr> gcc/ChangeLog * match.pd (bit_ior, bit_xor): Canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) as X*(C1+C2), and related variants, using tree_nonzero_bits to ensure that operands are bit-wise disjoint. gcc/testsuite/ChangeLog * gcc.dg/fold-ior-4.c: New test. Roger -- -----Original Message----- From: Marc Glisse <marc.gli...@inria.fr> Sent: 26 July 2021 16:45 To: Roger Sayle <ro...@nextmovesoftware.com> Cc: 'GCC Patches' <gcc-patches@gcc.gnu.org> Subject: Re: [PATCH] Fold (X<<C1)^(X<<C2) to a multiplication when possible. On Mon, 26 Jul 2021, Roger Sayle wrote: > The one aspect that's a little odd is that each transform is paired > with a convert@1 variant, using the efficient match machinery to > expose any zero extension to fold-const.c's tree_nonzero_bits functionality. Copying the first transform for context +(for op (bit_ior bit_xor) + (simplify + (op (mult:s@0 @1 INTEGER_CST@2) + (mult:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (mult @1 + { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); }))) +(simplify + (op (mult:s@0 (convert@1 @2) INTEGER_CST@3) + (mult:s@4 (convert@1 @2) INTEGER_CST@5)) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@4)) == 0) + (mult @1 + { wide_int_to_tree (type, wi::to_wide (@3) + wi::to_wide (@5)); }))) Could you explain how the convert helps exactly? -- Marc Glisse
diff --git a/gcc/match.pd b/gcc/match.pd index beb8d27..5bc6851 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2833,6 +2833,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (convert (mult (convert:t @0) { cst; }))))) #endif +/* Canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) to (C1+C2)*X when + tree_nonzero_bits allows IOR and XOR to be treated like PLUS. + Likewise, handle (X<<C3) and X as legitimate variants of X*C. */ +(for op (bit_ior bit_xor) + (simplify + (op (mult:s@0 @1 INTEGER_CST@2) + (mult:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (mult @1 + { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); }))) + (simplify + (op:c (mult:s@0 @1 INTEGER_CST@2) + (lshift:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && tree_int_cst_sgn (@4) > 0 + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (with { wide_int wone = wi::one (TYPE_PRECISION (type)); + wide_int c = wi::add (wi::to_wide (@2), + wi::lshift (wone, wi::to_wide (@4))); } + (mult @1 { wide_int_to_tree (type, c); })))) + (simplify + (op:c (mult:s@0 @1 INTEGER_CST@2) + @1) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0) + (mult @1 + { wide_int_to_tree (type, + wi::add (wi::to_wide (@2), 1)); }))) + (simplify + (op (lshift:s@0 @1 INTEGER_CST@2) + (lshift:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) + && tree_int_cst_sgn (@2) > 0 + && tree_int_cst_sgn (@4) > 0 + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (with { tree t = type; + if (!TYPE_OVERFLOW_WRAPS (t)) + t = unsigned_type_for (t); + wide_int wone = wi::one (TYPE_PRECISION (t)); + wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), + wi::lshift (wone, wi::to_wide (@4))); } + (convert (mult:t (convert:t @1) { wide_int_to_tree (t,c); }))))) + (simplify + (op:c (lshift:s@0 @1 INTEGER_CST@2) + @1) + (if (INTEGRAL_TYPE_P (type) + && tree_int_cst_sgn (@2) > 0 + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0) + (with { tree t = type; + if (!TYPE_OVERFLOW_WRAPS (t)) + t = unsigned_type_for (t); + wide_int wone = wi::one (TYPE_PRECISION (t)); + wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), wone); } + (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))) + /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */ (for minmax (min max FMIN_ALL FMAX_ALL)
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ unsigned int test_ior(unsigned char i) { return i | (i<<8) | (i<<16) | (i<<24); } unsigned int test_xor(unsigned char i) { return i ^ (i<<8) ^ (i<<16) ^ (i<<24); } unsigned int test_ior_1s(unsigned char i) { return i | (i<<8); } unsigned int test_ior_1u(unsigned char i) { unsigned int t = i; return t | (t<<8); } unsigned int test_xor_1s(unsigned char i) { return i ^ (i<<8); } unsigned int test_xor_1u(unsigned char i) { unsigned int t = i; return t ^ (t<<8); } unsigned int test_ior_2s(unsigned char i) { return (i<<8) | (i<<16); } unsigned int test_ior_2u(unsigned char i) { unsigned int t = i; return (t<<8) | (t<<16); } unsigned int test_xor_2s(unsigned char i) { return (i<<8) ^ (i<<16); } unsigned int test_xor_2u(unsigned char i) { unsigned int t = i; return (t<<8) ^ (t<<16); } /* { dg-final { scan-tree-dump-not " \\^ " "optimized" } } */ /* { dg-final { scan-tree-dump-not " \\| " "optimized" } } */ /* { dg-final { scan-tree-dump-times " \\* 16843009" 2 "optimized" } } */