Re: [PATCH] forwprop, v2: Canonicalize atomic fetch_op op x to op_fetch or vice versa [PR98737]
On Thu, 13 Jan 2022, Jakub Jelinek wrote: > On Thu, Jan 13, 2022 at 04:07:20PM +0100, Richard Biener wrote: > > I'm mostly concerned about the replace_uses_by use. forwprop > > will go over newly emitted stmts and thus the hypothetical added > > > > lhs2 = d; > > > > record the copy and schedule the stmt for removal, substituting 'd' > > in each use as it goes along the function and folding them. It's > > a bit iffy (and maybe has unintended side-effects in odd cases) > > to trample around and fold stuff behind that flows back. > > > > I'd always vote to simplify the folding code so it's easier to > > maintain and not micro-optimize there since it's not going to be > > a hot part of the compiler. > > Ok. So like this? > > 2022-01-13 Jakub Jelinek > > PR target/98737 > * tree-ssa-forwprop.c (simplify_builtin_call): Canonicalize > __atomic_fetch_op (p, x, y) op x into __atomic_op_fetch (p, x, y) > and __atomic_op_fetch (p, x, y) iop x into > __atomic_fetch_op (p, x, y). > > * gcc.dg/tree-ssa/pr98737-1.c: New test. > * gcc.dg/tree-ssa/pr98737-2.c: New test. > > --- gcc/tree-ssa-forwprop.c.jj2022-01-11 23:11:23.467275019 +0100 > +++ gcc/tree-ssa-forwprop.c 2022-01-13 18:09:50.318625915 +0100 > @@ -1241,12 +1241,19 @@ constant_pointer_difference (tree p1, tr > memset (p + 4, ' ', 3); > into > memcpy (p, "abcd ", 7); > - call if the latter can be stored by pieces during expansion. */ > + call if the latter can be stored by pieces during expansion. > + > + Also canonicalize __atomic_fetch_op (p, x, y) op x > + to __atomic_op_fetch (p, x, y) or > + __atomic_op_fetch (p, x, y) iop x > + to __atomic_fetch_op (p, x, y) when possible (also __sync). */ > > static bool > simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) > { >gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p); > + enum built_in_function other_atomic = END_BUILTINS; > + enum tree_code atomic_op = ERROR_MARK; >tree vuse = gimple_vuse (stmt2); >if (vuse == NULL) > return false; > @@ -1448,6 +1455,290 @@ simplify_builtin_call (gimple_stmt_itera > } > } >break; > + > + #define CASE_ATOMIC(NAME, OTHER, OP) \ > +case BUILT_IN_##NAME##_1: > \ > +case BUILT_IN_##NAME##_2: > \ > +case BUILT_IN_##NAME##_4: > \ > +case BUILT_IN_##NAME##_8: > \ > +case BUILT_IN_##NAME##_16: > \ > + atomic_op = OP; > \ > + other_atomic \ > + = (enum built_in_function) (BUILT_IN_##OTHER##_1\ > + + (DECL_FUNCTION_CODE (callee2) \ > +- BUILT_IN_##NAME##_1)); \ > + goto handle_atomic_fetch_op; > + > +CASE_ATOMIC (ATOMIC_FETCH_ADD, ATOMIC_ADD_FETCH, PLUS_EXPR) > +CASE_ATOMIC (ATOMIC_FETCH_SUB, ATOMIC_SUB_FETCH, MINUS_EXPR) > +CASE_ATOMIC (ATOMIC_FETCH_AND, ATOMIC_AND_FETCH, BIT_AND_EXPR) > +CASE_ATOMIC (ATOMIC_FETCH_XOR, ATOMIC_XOR_FETCH, BIT_XOR_EXPR) > +CASE_ATOMIC (ATOMIC_FETCH_OR, ATOMIC_OR_FETCH, BIT_IOR_EXPR) > + > +CASE_ATOMIC (SYNC_FETCH_AND_ADD, SYNC_ADD_AND_FETCH, PLUS_EXPR) > +CASE_ATOMIC (SYNC_FETCH_AND_SUB, SYNC_SUB_AND_FETCH, MINUS_EXPR) > +CASE_ATOMIC (SYNC_FETCH_AND_AND, SYNC_AND_AND_FETCH, BIT_AND_EXPR) > +CASE_ATOMIC (SYNC_FETCH_AND_XOR, SYNC_XOR_AND_FETCH, BIT_XOR_EXPR) > +CASE_ATOMIC (SYNC_FETCH_AND_OR, SYNC_OR_AND_FETCH, BIT_IOR_EXPR) > + > +CASE_ATOMIC (ATOMIC_ADD_FETCH, ATOMIC_FETCH_ADD, MINUS_EXPR) > +CASE_ATOMIC (ATOMIC_SUB_FETCH, ATOMIC_FETCH_SUB, PLUS_EXPR) > +CASE_ATOMIC (ATOMIC_XOR_FETCH, ATOMIC_FETCH_XOR, BIT_XOR_EXPR) > + > +CASE_ATOMIC (SYNC_ADD_AND_FETCH, SYNC_FETCH_AND_ADD, MINUS_EXPR) > +CASE_ATOMIC (SYNC_SUB_AND_FETCH, SYNC_FETCH_AND_SUB, PLUS_EXPR) > +CASE_ATOMIC (SYNC_XOR_AND_FETCH, SYNC_FETCH_AND_XOR, BIT_XOR_EXPR) > + > +#undef CASE_ATOMIC > + > +handle_atomic_fetch_op: > + if (gimple_call_num_args (stmt2) >= 2 && gimple_call_lhs (stmt2)) > + { > + tree lhs2 = gimple_call_lhs (stmt2), lhsc = lhs2; > + tree arg = gimple_call_arg (stmt2, 1); > + gimple *use_stmt, *cast_stmt = NULL; > + use_operand_p use_p; > + tree ndecl = builtin_decl_explicit (other_atomic); > + > + if (ndecl == NULL_TREE || !single_imm_use (lhs2, _p, _stmt)) > + break; > + > + if (gimple_assign_cast_p (use_stmt)) > + { > + cast_stmt = use_stmt; > + lhsc = gimple_assign_lhs (cast_stmt); > + if (lhsc == NULL_TREE > + || !INTEGRAL_TYPE_P (TREE_TYPE (lhsc)) > + ||
[PATCH] forwprop, v2: Canonicalize atomic fetch_op op x to op_fetch or vice versa [PR98737]
On Thu, Jan 13, 2022 at 04:07:20PM +0100, Richard Biener wrote: > I'm mostly concerned about the replace_uses_by use. forwprop > will go over newly emitted stmts and thus the hypothetical added > > lhs2 = d; > > record the copy and schedule the stmt for removal, substituting 'd' > in each use as it goes along the function and folding them. It's > a bit iffy (and maybe has unintended side-effects in odd cases) > to trample around and fold stuff behind that flows back. > > I'd always vote to simplify the folding code so it's easier to > maintain and not micro-optimize there since it's not going to be > a hot part of the compiler. Ok. So like this? 2022-01-13 Jakub Jelinek PR target/98737 * tree-ssa-forwprop.c (simplify_builtin_call): Canonicalize __atomic_fetch_op (p, x, y) op x into __atomic_op_fetch (p, x, y) and __atomic_op_fetch (p, x, y) iop x into __atomic_fetch_op (p, x, y). * gcc.dg/tree-ssa/pr98737-1.c: New test. * gcc.dg/tree-ssa/pr98737-2.c: New test. --- gcc/tree-ssa-forwprop.c.jj 2022-01-11 23:11:23.467275019 +0100 +++ gcc/tree-ssa-forwprop.c 2022-01-13 18:09:50.318625915 +0100 @@ -1241,12 +1241,19 @@ constant_pointer_difference (tree p1, tr memset (p + 4, ' ', 3); into memcpy (p, "abcd ", 7); - call if the latter can be stored by pieces during expansion. */ + call if the latter can be stored by pieces during expansion. + + Also canonicalize __atomic_fetch_op (p, x, y) op x + to __atomic_op_fetch (p, x, y) or + __atomic_op_fetch (p, x, y) iop x + to __atomic_fetch_op (p, x, y) when possible (also __sync). */ static bool simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) { gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p); + enum built_in_function other_atomic = END_BUILTINS; + enum tree_code atomic_op = ERROR_MARK; tree vuse = gimple_vuse (stmt2); if (vuse == NULL) return false; @@ -1448,6 +1455,290 @@ simplify_builtin_call (gimple_stmt_itera } } break; + + #define CASE_ATOMIC(NAME, OTHER, OP) \ +case BUILT_IN_##NAME##_1: \ +case BUILT_IN_##NAME##_2: \ +case BUILT_IN_##NAME##_4: \ +case BUILT_IN_##NAME##_8: \ +case BUILT_IN_##NAME##_16: \ + atomic_op = OP; \ + other_atomic \ + = (enum built_in_function) (BUILT_IN_##OTHER##_1\ + + (DECL_FUNCTION_CODE (callee2) \ + - BUILT_IN_##NAME##_1)); \ + goto handle_atomic_fetch_op; + +CASE_ATOMIC (ATOMIC_FETCH_ADD, ATOMIC_ADD_FETCH, PLUS_EXPR) +CASE_ATOMIC (ATOMIC_FETCH_SUB, ATOMIC_SUB_FETCH, MINUS_EXPR) +CASE_ATOMIC (ATOMIC_FETCH_AND, ATOMIC_AND_FETCH, BIT_AND_EXPR) +CASE_ATOMIC (ATOMIC_FETCH_XOR, ATOMIC_XOR_FETCH, BIT_XOR_EXPR) +CASE_ATOMIC (ATOMIC_FETCH_OR, ATOMIC_OR_FETCH, BIT_IOR_EXPR) + +CASE_ATOMIC (SYNC_FETCH_AND_ADD, SYNC_ADD_AND_FETCH, PLUS_EXPR) +CASE_ATOMIC (SYNC_FETCH_AND_SUB, SYNC_SUB_AND_FETCH, MINUS_EXPR) +CASE_ATOMIC (SYNC_FETCH_AND_AND, SYNC_AND_AND_FETCH, BIT_AND_EXPR) +CASE_ATOMIC (SYNC_FETCH_AND_XOR, SYNC_XOR_AND_FETCH, BIT_XOR_EXPR) +CASE_ATOMIC (SYNC_FETCH_AND_OR, SYNC_OR_AND_FETCH, BIT_IOR_EXPR) + +CASE_ATOMIC (ATOMIC_ADD_FETCH, ATOMIC_FETCH_ADD, MINUS_EXPR) +CASE_ATOMIC (ATOMIC_SUB_FETCH, ATOMIC_FETCH_SUB, PLUS_EXPR) +CASE_ATOMIC (ATOMIC_XOR_FETCH, ATOMIC_FETCH_XOR, BIT_XOR_EXPR) + +CASE_ATOMIC (SYNC_ADD_AND_FETCH, SYNC_FETCH_AND_ADD, MINUS_EXPR) +CASE_ATOMIC (SYNC_SUB_AND_FETCH, SYNC_FETCH_AND_SUB, PLUS_EXPR) +CASE_ATOMIC (SYNC_XOR_AND_FETCH, SYNC_FETCH_AND_XOR, BIT_XOR_EXPR) + +#undef CASE_ATOMIC + +handle_atomic_fetch_op: + if (gimple_call_num_args (stmt2) >= 2 && gimple_call_lhs (stmt2)) + { + tree lhs2 = gimple_call_lhs (stmt2), lhsc = lhs2; + tree arg = gimple_call_arg (stmt2, 1); + gimple *use_stmt, *cast_stmt = NULL; + use_operand_p use_p; + tree ndecl = builtin_decl_explicit (other_atomic); + + if (ndecl == NULL_TREE || !single_imm_use (lhs2, _p, _stmt)) + break; + + if (gimple_assign_cast_p (use_stmt)) + { + cast_stmt = use_stmt; + lhsc = gimple_assign_lhs (cast_stmt); + if (lhsc == NULL_TREE + || !INTEGRAL_TYPE_P (TREE_TYPE (lhsc)) + || (TYPE_PRECISION (TREE_TYPE (lhsc)) + != TYPE_PRECISION (TREE_TYPE (lhs2))) + || !single_imm_use (lhsc, _p, _stmt)) + { + use_stmt = cast_stmt; + cast_stmt = NULL; + lhsc = lhs2;