On Thu, 12 Oct 2017, Jakub Jelinek wrote: > Hi! > > Marc in the PR mentioned that it is not really good that the recommended > rotate pattern is recognized only during forwprop1 and later, which is after > einline and that inlining or early opts could have changed stuff too much so > that we wouldn't recogize it anymore.
Hmm, but the only thing functions see is inlining early optimized bodies into them and then constant propagation performed, so I don't see how we could confuse the pattern in a way to be indetectable. Also early inlining is performed on early optimized bodies so cost metrics see rotates, not the unrecognized form. > The following patch handles that pattern in fold_binary_loc too, and while > I've touched it, it cleans a lot of ugliness/duplication in that code. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Still looks like an improvement, thus ok. Thanks, Richard. > 2017-10-12 Jakub Jelinek <ja...@redhat.com> > > PR target/82498 > * fold-const.c (fold_binary_loc) <bit_rotate>: Code cleanups, > instead of handling MINUS_EXPR twice (once for each argument), > canonicalize operand order and handle just once, use rtype where > possible. Handle (A << B) | (A >> (-B & (Z - 1))). > > * gcc.dg/tree-ssa/pr82498.c: New test. > > --- gcc/fold-const.c.jj 2017-10-11 22:37:51.000000000 +0200 > +++ gcc/fold-const.c 2017-10-12 13:17:45.360589554 +0200 > @@ -9429,7 +9429,10 @@ fold_binary_loc (location_t loc, > /* (A << C1) + (A >> C2) if A is unsigned and C1+C2 is the size of A > is a rotate of A by C1 bits. */ > /* (A << B) + (A >> (Z - B)) if A is unsigned and Z is the size of A > - is a rotate of A by B bits. */ > + is a rotate of A by B bits. > + Similarly for (A << B) | (A >> (-B & C3)) where C3 is Z-1, > + though in this case CODE must be | and not + or ^, otherwise > + it doesn't return A when B is 0. */ > { > enum tree_code code0, code1; > tree rtype; > @@ -9447,25 +9450,32 @@ fold_binary_loc (location_t loc, > == GET_MODE_UNIT_PRECISION (TYPE_MODE (rtype)))) > { > tree tree01, tree11; > + tree orig_tree01, orig_tree11; > enum tree_code code01, code11; > > - tree01 = TREE_OPERAND (arg0, 1); > - tree11 = TREE_OPERAND (arg1, 1); > + tree01 = orig_tree01 = TREE_OPERAND (arg0, 1); > + tree11 = orig_tree11 = TREE_OPERAND (arg1, 1); > STRIP_NOPS (tree01); > STRIP_NOPS (tree11); > code01 = TREE_CODE (tree01); > code11 = TREE_CODE (tree11); > + if (code11 != MINUS_EXPR > + && (code01 == MINUS_EXPR || code01 == BIT_AND_EXPR)) > + { > + std::swap (code0, code1); > + std::swap (code01, code11); > + std::swap (tree01, tree11); > + std::swap (orig_tree01, orig_tree11); > + } > if (code01 == INTEGER_CST > && code11 == INTEGER_CST > && (wi::to_widest (tree01) + wi::to_widest (tree11) > - == element_precision (TREE_TYPE (TREE_OPERAND (arg0, 0))))) > + == element_precision (rtype))) > { > tem = build2_loc (loc, LROTATE_EXPR, > - TREE_TYPE (TREE_OPERAND (arg0, 0)), > - TREE_OPERAND (arg0, 0), > + rtype, TREE_OPERAND (arg0, 0), > code0 == LSHIFT_EXPR > - ? TREE_OPERAND (arg0, 1) > - : TREE_OPERAND (arg1, 1)); > + ? orig_tree01 : orig_tree11); > return fold_convert_loc (loc, type, tem); > } > else if (code11 == MINUS_EXPR) > @@ -9477,39 +9487,37 @@ fold_binary_loc (location_t loc, > STRIP_NOPS (tree111); > if (TREE_CODE (tree110) == INTEGER_CST > && 0 == compare_tree_int (tree110, > - element_precision > - (TREE_TYPE (TREE_OPERAND > - (arg0, 0)))) > + element_precision (rtype)) > && operand_equal_p (tree01, tree111, 0)) > - return > - fold_convert_loc (loc, type, > - build2 ((code0 == LSHIFT_EXPR > - ? LROTATE_EXPR > - : RROTATE_EXPR), > - TREE_TYPE (TREE_OPERAND (arg0, > 0)), > - TREE_OPERAND (arg0, 0), > - TREE_OPERAND (arg0, 1))); > + { > + tem = build2_loc (loc, (code0 == LSHIFT_EXPR > + ? LROTATE_EXPR : RROTATE_EXPR), > + rtype, TREE_OPERAND (arg0, 0), > + orig_tree01); > + return fold_convert_loc (loc, type, tem); > + } > } > - else if (code01 == MINUS_EXPR) > + else if (code == BIT_IOR_EXPR > + && code11 == BIT_AND_EXPR > + && pow2p_hwi (element_precision (rtype))) > { > - tree tree010, tree011; > - tree010 = TREE_OPERAND (tree01, 0); > - tree011 = TREE_OPERAND (tree01, 1); > - STRIP_NOPS (tree010); > - STRIP_NOPS (tree011); > - if (TREE_CODE (tree010) == INTEGER_CST > - && 0 == compare_tree_int (tree010, > - element_precision > - (TREE_TYPE (TREE_OPERAND > - (arg0, 0)))) > - && operand_equal_p (tree11, tree011, 0)) > - return fold_convert_loc > - (loc, type, > - build2 ((code0 != LSHIFT_EXPR > - ? LROTATE_EXPR > - : RROTATE_EXPR), > - TREE_TYPE (TREE_OPERAND (arg0, 0)), > - TREE_OPERAND (arg0, 0), TREE_OPERAND (arg1, 1))); > + tree tree110, tree111; > + tree110 = TREE_OPERAND (tree11, 0); > + tree111 = TREE_OPERAND (tree11, 1); > + STRIP_NOPS (tree110); > + STRIP_NOPS (tree111); > + if (TREE_CODE (tree110) == NEGATE_EXPR > + && TREE_CODE (tree111) == INTEGER_CST > + && 0 == compare_tree_int (tree111, > + element_precision (rtype) - 1) > + && operand_equal_p (tree01, TREE_OPERAND (tree110, 0), 0)) > + { > + tem = build2_loc (loc, (code0 == LSHIFT_EXPR > + ? LROTATE_EXPR : RROTATE_EXPR), > + rtype, TREE_OPERAND (arg0, 0), > + orig_tree01); > + return fold_convert_loc (loc, type, tem); > + } > } > } > } > --- gcc/testsuite/gcc.dg/tree-ssa/pr82498.c.jj 2017-10-12 > 13:14:27.234991970 +0200 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr82498.c 2017-10-12 13:13:45.000000000 > +0200 > @@ -0,0 +1,53 @@ > +/* PR target/82498 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-original" } */ > +/* { dg-final { scan-tree-dump-times "x r<< y" 4 "original" { target int32 } > } } */ > +/* { dg-final { scan-tree-dump-times "x r>> y" 4 "original" { target int32 } > } } */ > + > +unsigned > +f1 (unsigned x, int y) > +{ > + return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)); > +} > + > +unsigned > +f2 (unsigned x, int y) > +{ > + return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))); > +} > + > +unsigned > +f3 (unsigned x, int y) > +{ > + return (x >> y) | (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y)); > +} > + > +unsigned > +f4 (unsigned x, int y) > +{ > + return (x >> y) | (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))); > +} > + > +unsigned > +f5 (unsigned x, int y) > +{ > + return (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x << y); > +} > + > +unsigned > +f6 (unsigned x, int y) > +{ > + return (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x << y); > +} > + > +unsigned > +f7 (unsigned x, int y) > +{ > + return (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x >> y); > +} > + > +unsigned > +f8 (unsigned x, int y) > +{ > + return (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> y); > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)