The emergency reassociation patch for PR52976 disabled un-distribution in the presence of repeated factors to avoid ICEs in zero_one_operation. This patch fixes such cases properly by teaching zero_one_operation about __builtin_pow* calls.
Bootstrapped with no new regressions on powerpc64-linux. Also built SPEC cpu2000 and cpu2006 successfully. Ok for trunk? Thanks, Bill gcc: 2012-04-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> * tree-ssa-reassoc.c (stmt_is_power_of_op): New function. (decrement_power): Likewise. (propagate_op_to_single_use): Likewise. (zero_one_operation): Handle __builtin_pow* calls in linearized expression trees; factor logic into propagate_op_to_single_use. (undistribute_ops_list): Allow operands with repeat counts > 1. gcc/testsuite: 2012-04-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> gfortran.dg/reassoc_7.f: New test. gfortran.dg/reassoc_8.f: Likewise. gfortran.dg/reassoc_9.f: Likewise. gfortran.dg/reassoc_10.f: Likewise. Index: gcc/testsuite/gfortran.dg/reassoc_10.f =================================================================== --- gcc/testsuite/gfortran.dg/reassoc_10.f (revision 0) +++ gcc/testsuite/gfortran.dg/reassoc_10.f (revision 0) @@ -0,0 +1,17 @@ +! { dg-do compile } +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } + + SUBROUTINE S55199(P,Q,Dvdph) + implicit none + real(8) :: c1,c2,c3,P,Q,Dvdph + c1=0.1d0 + c2=0.2d0 + c3=0.3d0 + Dvdph = c1 + 2.*P*c2 + 3.*P**2*Q**3*c3 + END + +! There should be five multiplies following un-distribution +! and power expansion. + +! { dg-final { scan-tree-dump-times " \\\* " 5 "optimized" } } +! { dg-final { cleanup-tree-dump "optimized" } } Index: gcc/testsuite/gfortran.dg/reassoc_7.f =================================================================== --- gcc/testsuite/gfortran.dg/reassoc_7.f (revision 0) +++ gcc/testsuite/gfortran.dg/reassoc_7.f (revision 0) @@ -0,0 +1,16 @@ +! { dg-do compile } +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } + + SUBROUTINE S55199(P,Dvdph) + implicit none + real(8) :: c1,c2,c3,P,Dvdph + c1=0.1d0 + c2=0.2d0 + c3=0.3d0 + Dvdph = c1 + 2.*P*c2 + 3.*P**2*c3 + END + +! There should be two multiplies following un-distribution. + +! { dg-final { scan-tree-dump-times " \\\* " 2 "optimized" } } +! { dg-final { cleanup-tree-dump "optimized" } } Index: gcc/testsuite/gfortran.dg/reassoc_8.f =================================================================== --- gcc/testsuite/gfortran.dg/reassoc_8.f (revision 0) +++ gcc/testsuite/gfortran.dg/reassoc_8.f (revision 0) @@ -0,0 +1,17 @@ +! { dg-do compile } +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } + + SUBROUTINE S55199(P,Dvdph) + implicit none + real(8) :: c1,c2,c3,P,Dvdph + c1=0.1d0 + c2=0.2d0 + c3=0.3d0 + Dvdph = c1 + 2.*P**2*c2 + 3.*P**3*c3 + END + +! There should be three multiplies following un-distribution +! and power expansion. + +! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } +! { dg-final { cleanup-tree-dump "optimized" } } Index: gcc/testsuite/gfortran.dg/reassoc_9.f =================================================================== --- gcc/testsuite/gfortran.dg/reassoc_9.f (revision 0) +++ gcc/testsuite/gfortran.dg/reassoc_9.f (revision 0) @@ -0,0 +1,17 @@ +! { dg-do compile } +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } + + SUBROUTINE S55199(P,Dvdph) + implicit none + real(8) :: c1,c2,c3,P,Dvdph + c1=0.1d0 + c2=0.2d0 + c3=0.3d0 + Dvdph = c1 + 2.*P**2*c2 + 3.*P**4*c3 + END + +! There should be three multiplies following un-distribution +! and power expansion. + +! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } +! { dg-final { cleanup-tree-dump "optimized" } } Index: gcc/tree-ssa-reassoc.c =================================================================== --- gcc/tree-ssa-reassoc.c (revision 186495) +++ gcc/tree-ssa-reassoc.c (working copy) @@ -1020,6 +1020,98 @@ oecount_cmp (const void *p1, const void *p2) return c1->id - c2->id; } +/* Return TRUE iff STMT represents a builtin call that raises OP + to some exponent. */ + +static bool +stmt_is_power_of_op (gimple stmt, tree op) +{ + tree fndecl; + + if (!is_gimple_call (stmt)) + return false; + + fndecl = gimple_call_fndecl (stmt); + + if (!fndecl + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) + return false; + + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) + { + CASE_FLT_FN (BUILT_IN_POW): + CASE_FLT_FN (BUILT_IN_POWI): + return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); + + default: + return false; + } +} + +/* Given STMT which is a __builtin_pow* call, decrement its exponent + in place and return the result. Assumes that stmt_is_power_of_op + was previously called for STMT and returned TRUE. */ + +static HOST_WIDE_INT +decrement_power (gimple stmt) +{ + REAL_VALUE_TYPE c, cint; + HOST_WIDE_INT power; + tree arg1; + + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) + { + CASE_FLT_FN (BUILT_IN_POW): + arg1 = gimple_call_arg (stmt, 1); + c = TREE_REAL_CST (arg1); + power = real_to_integer (&c) - 1; + real_from_integer (&cint, VOIDmode, power, 0, 0); + gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); + return power; + + CASE_FLT_FN (BUILT_IN_POWI): + arg1 = gimple_call_arg (stmt, 1); + power = TREE_INT_CST_LOW (arg1) - 1; + gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); + return power; + + default: + gcc_unreachable (); + } +} + +/* Find the single immediate use of STMT's LHS, and replace it + with OP. Remove STMT. If STMT's LHS is the same as *DEF, + replace *DEF with OP as well. */ + +static void +propagate_op_to_single_use (tree op, gimple stmt, tree *def) +{ + tree lhs; + gimple use_stmt; + use_operand_p use; + gimple_stmt_iterator gsi; + + if (is_gimple_call (stmt)) + lhs = gimple_call_lhs (stmt); + else + lhs = gimple_assign_lhs (stmt); + + gcc_assert (has_single_use (lhs)); + single_imm_use (lhs, &use, &use_stmt); + if (lhs == *def) + *def = op; + SET_USE (use, op); + if (TREE_CODE (op) != SSA_NAME) + update_stmt (use_stmt); + gsi = gsi_for_stmt (stmt); + gsi_remove (&gsi, true); + release_defs (stmt); + + if (is_gimple_call (stmt)) + unlink_stmt_vdef (stmt); +} + /* Walks the linear chain with result *DEF searching for an operation with operand OP and code OPCODE removing that from the chain. *DEF is updated if there is only one operand but no operation left. */ @@ -1031,8 +1123,18 @@ zero_one_operation (tree *def, enum tree_code opco do { - tree name = gimple_assign_rhs1 (stmt); + tree name; + if (opcode == MULT_EXPR + && stmt_is_power_of_op (stmt, op)) + { + if (decrement_power (stmt) == 1) + propagate_op_to_single_use (op, stmt, def); + return; + } + + name = gimple_assign_rhs1 (stmt); + /* If this is the operation we look for and one of the operands is ours simply propagate the other operand into the stmts single use. */ @@ -1040,24 +1142,27 @@ zero_one_operation (tree *def, enum tree_code opco && (name == op || gimple_assign_rhs2 (stmt) == op)) { - gimple use_stmt; - use_operand_p use; - gimple_stmt_iterator gsi; if (name == op) name = gimple_assign_rhs2 (stmt); - gcc_assert (has_single_use (gimple_assign_lhs (stmt))); - single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt); - if (gimple_assign_lhs (stmt) == *def) - *def = name; - SET_USE (use, name); - if (TREE_CODE (name) != SSA_NAME) - update_stmt (use_stmt); - gsi = gsi_for_stmt (stmt); - gsi_remove (&gsi, true); - release_defs (stmt); + propagate_op_to_single_use (name, stmt, def); return; } + /* We might have a multiply of two __builtin_pow* calls, and + the operand might be hiding in the rightmost one. */ + if (opcode == MULT_EXPR + && gimple_assign_rhs_code (stmt) == opcode + && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) + { + gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + if (stmt_is_power_of_op (stmt2, op)) + { + if (decrement_power (stmt2) == 1) + propagate_op_to_single_use (op, stmt2, def); + return; + } + } + /* Continue walking the chain. */ gcc_assert (name != op && TREE_CODE (name) == SSA_NAME); @@ -1222,7 +1327,6 @@ undistribute_ops_list (enum tree_code opcode, dcode = gimple_assign_rhs_code (oe1def); if ((dcode != MULT_EXPR && dcode != RDIV_EXPR) - || oe1->count != 1 || !is_reassociable_op (oe1def, dcode, loop)) continue; @@ -1266,8 +1370,6 @@ undistribute_ops_list (enum tree_code opcode, oecount c; void **slot; size_t idx; - if (oe1->count != 1) - continue; c.oecode = oecode; c.cnt = 1; c.id = next_oecount_id++; @@ -1336,7 +1438,7 @@ undistribute_ops_list (enum tree_code opcode, FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1) { - if (oe1->op == c->op && oe1->count == 1) + if (oe1->op == c->op) { SET_BIT (candidates2, i); ++nr_candidates2;