On Tue, 17 Apr 2012, William J. Schmidt wrote: > The emergency reassociation patch for PR52976 disabled un-distribution > in the presence of repeated factors to avoid ICEs in zero_one_operation. > This patch fixes such cases properly by teaching zero_one_operation > about __builtin_pow* calls. > > Bootstrapped with no new regressions on powerpc64-linux. Also built > SPEC cpu2000 and cpu2006 successfully. Ok for trunk?
Ok. Thanks, Richard > Thanks, > Bill > > > gcc: > > 2012-04-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> > > * tree-ssa-reassoc.c (stmt_is_power_of_op): New function. > (decrement_power): Likewise. > (propagate_op_to_single_use): Likewise. > (zero_one_operation): Handle __builtin_pow* calls in linearized > expression trees; factor logic into propagate_op_to_single_use. > (undistribute_ops_list): Allow operands with repeat counts > 1. > > > gcc/testsuite: > > 2012-04-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> > > gfortran.dg/reassoc_7.f: New test. > gfortran.dg/reassoc_8.f: Likewise. > gfortran.dg/reassoc_9.f: Likewise. > gfortran.dg/reassoc_10.f: Likewise. > > > Index: gcc/testsuite/gfortran.dg/reassoc_10.f > =================================================================== > --- gcc/testsuite/gfortran.dg/reassoc_10.f (revision 0) > +++ gcc/testsuite/gfortran.dg/reassoc_10.f (revision 0) > @@ -0,0 +1,17 @@ > +! { dg-do compile } > +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } > + > + SUBROUTINE S55199(P,Q,Dvdph) > + implicit none > + real(8) :: c1,c2,c3,P,Q,Dvdph > + c1=0.1d0 > + c2=0.2d0 > + c3=0.3d0 > + Dvdph = c1 + 2.*P*c2 + 3.*P**2*Q**3*c3 > + END > + > +! There should be five multiplies following un-distribution > +! and power expansion. > + > +! { dg-final { scan-tree-dump-times " \\\* " 5 "optimized" } } > +! { dg-final { cleanup-tree-dump "optimized" } } > Index: gcc/testsuite/gfortran.dg/reassoc_7.f > =================================================================== > --- gcc/testsuite/gfortran.dg/reassoc_7.f (revision 0) > +++ gcc/testsuite/gfortran.dg/reassoc_7.f (revision 0) > @@ -0,0 +1,16 @@ > +! { dg-do compile } > +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } > + > + SUBROUTINE S55199(P,Dvdph) > + implicit none > + real(8) :: c1,c2,c3,P,Dvdph > + c1=0.1d0 > + c2=0.2d0 > + c3=0.3d0 > + Dvdph = c1 + 2.*P*c2 + 3.*P**2*c3 > + END > + > +! There should be two multiplies following un-distribution. > + > +! { dg-final { scan-tree-dump-times " \\\* " 2 "optimized" } } > +! { dg-final { cleanup-tree-dump "optimized" } } > Index: gcc/testsuite/gfortran.dg/reassoc_8.f > =================================================================== > --- gcc/testsuite/gfortran.dg/reassoc_8.f (revision 0) > +++ gcc/testsuite/gfortran.dg/reassoc_8.f (revision 0) > @@ -0,0 +1,17 @@ > +! { dg-do compile } > +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } > + > + SUBROUTINE S55199(P,Dvdph) > + implicit none > + real(8) :: c1,c2,c3,P,Dvdph > + c1=0.1d0 > + c2=0.2d0 > + c3=0.3d0 > + Dvdph = c1 + 2.*P**2*c2 + 3.*P**3*c3 > + END > + > +! There should be three multiplies following un-distribution > +! and power expansion. > + > +! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } > +! { dg-final { cleanup-tree-dump "optimized" } } > Index: gcc/testsuite/gfortran.dg/reassoc_9.f > =================================================================== > --- gcc/testsuite/gfortran.dg/reassoc_9.f (revision 0) > +++ gcc/testsuite/gfortran.dg/reassoc_9.f (revision 0) > @@ -0,0 +1,17 @@ > +! { dg-do compile } > +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } > + > + SUBROUTINE S55199(P,Dvdph) > + implicit none > + real(8) :: c1,c2,c3,P,Dvdph > + c1=0.1d0 > + c2=0.2d0 > + c3=0.3d0 > + Dvdph = c1 + 2.*P**2*c2 + 3.*P**4*c3 > + END > + > +! There should be three multiplies following un-distribution > +! and power expansion. > + > +! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } > +! { dg-final { cleanup-tree-dump "optimized" } } > Index: gcc/tree-ssa-reassoc.c > =================================================================== > --- gcc/tree-ssa-reassoc.c (revision 186495) > +++ gcc/tree-ssa-reassoc.c (working copy) > @@ -1020,6 +1020,98 @@ oecount_cmp (const void *p1, const void *p2) > return c1->id - c2->id; > } > > +/* Return TRUE iff STMT represents a builtin call that raises OP > + to some exponent. */ > + > +static bool > +stmt_is_power_of_op (gimple stmt, tree op) > +{ > + tree fndecl; > + > + if (!is_gimple_call (stmt)) > + return false; > + > + fndecl = gimple_call_fndecl (stmt); > + > + if (!fndecl > + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) > + return false; > + > + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) > + { > + CASE_FLT_FN (BUILT_IN_POW): > + CASE_FLT_FN (BUILT_IN_POWI): > + return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); > + > + default: > + return false; > + } > +} > + > +/* Given STMT which is a __builtin_pow* call, decrement its exponent > + in place and return the result. Assumes that stmt_is_power_of_op > + was previously called for STMT and returned TRUE. */ > + > +static HOST_WIDE_INT > +decrement_power (gimple stmt) > +{ > + REAL_VALUE_TYPE c, cint; > + HOST_WIDE_INT power; > + tree arg1; > + > + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) > + { > + CASE_FLT_FN (BUILT_IN_POW): > + arg1 = gimple_call_arg (stmt, 1); > + c = TREE_REAL_CST (arg1); > + power = real_to_integer (&c) - 1; > + real_from_integer (&cint, VOIDmode, power, 0, 0); > + gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); > + return power; > + > + CASE_FLT_FN (BUILT_IN_POWI): > + arg1 = gimple_call_arg (stmt, 1); > + power = TREE_INT_CST_LOW (arg1) - 1; > + gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); > + return power; > + > + default: > + gcc_unreachable (); > + } > +} > + > +/* Find the single immediate use of STMT's LHS, and replace it > + with OP. Remove STMT. If STMT's LHS is the same as *DEF, > + replace *DEF with OP as well. */ > + > +static void > +propagate_op_to_single_use (tree op, gimple stmt, tree *def) > +{ > + tree lhs; > + gimple use_stmt; > + use_operand_p use; > + gimple_stmt_iterator gsi; > + > + if (is_gimple_call (stmt)) > + lhs = gimple_call_lhs (stmt); > + else > + lhs = gimple_assign_lhs (stmt); > + > + gcc_assert (has_single_use (lhs)); > + single_imm_use (lhs, &use, &use_stmt); > + if (lhs == *def) > + *def = op; > + SET_USE (use, op); > + if (TREE_CODE (op) != SSA_NAME) > + update_stmt (use_stmt); > + gsi = gsi_for_stmt (stmt); > + gsi_remove (&gsi, true); > + release_defs (stmt); > + > + if (is_gimple_call (stmt)) > + unlink_stmt_vdef (stmt); > +} > + > /* Walks the linear chain with result *DEF searching for an operation > with operand OP and code OPCODE removing that from the chain. *DEF > is updated if there is only one operand but no operation left. */ > @@ -1031,8 +1123,18 @@ zero_one_operation (tree *def, enum tree_code opco > > do > { > - tree name = gimple_assign_rhs1 (stmt); > + tree name; > > + if (opcode == MULT_EXPR > + && stmt_is_power_of_op (stmt, op)) > + { > + if (decrement_power (stmt) == 1) > + propagate_op_to_single_use (op, stmt, def); > + return; > + } > + > + name = gimple_assign_rhs1 (stmt); > + > /* If this is the operation we look for and one of the operands > is ours simply propagate the other operand into the stmts > single use. */ > @@ -1040,24 +1142,27 @@ zero_one_operation (tree *def, enum tree_code opco > && (name == op > || gimple_assign_rhs2 (stmt) == op)) > { > - gimple use_stmt; > - use_operand_p use; > - gimple_stmt_iterator gsi; > if (name == op) > name = gimple_assign_rhs2 (stmt); > - gcc_assert (has_single_use (gimple_assign_lhs (stmt))); > - single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt); > - if (gimple_assign_lhs (stmt) == *def) > - *def = name; > - SET_USE (use, name); > - if (TREE_CODE (name) != SSA_NAME) > - update_stmt (use_stmt); > - gsi = gsi_for_stmt (stmt); > - gsi_remove (&gsi, true); > - release_defs (stmt); > + propagate_op_to_single_use (name, stmt, def); > return; > } > > + /* We might have a multiply of two __builtin_pow* calls, and > + the operand might be hiding in the rightmost one. */ > + if (opcode == MULT_EXPR > + && gimple_assign_rhs_code (stmt) == opcode > + && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) > + { > + gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); > + if (stmt_is_power_of_op (stmt2, op)) > + { > + if (decrement_power (stmt2) == 1) > + propagate_op_to_single_use (op, stmt2, def); > + return; > + } > + } > + > /* Continue walking the chain. */ > gcc_assert (name != op > && TREE_CODE (name) == SSA_NAME); > @@ -1222,7 +1327,6 @@ undistribute_ops_list (enum tree_code opcode, > dcode = gimple_assign_rhs_code (oe1def); > if ((dcode != MULT_EXPR > && dcode != RDIV_EXPR) > - || oe1->count != 1 > || !is_reassociable_op (oe1def, dcode, loop)) > continue; > > @@ -1266,8 +1370,6 @@ undistribute_ops_list (enum tree_code opcode, > oecount c; > void **slot; > size_t idx; > - if (oe1->count != 1) > - continue; > c.oecode = oecode; > c.cnt = 1; > c.id = next_oecount_id++; > @@ -1336,7 +1438,7 @@ undistribute_ops_list (enum tree_code opcode, > > FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1) > { > - if (oe1->op == c->op && oe1->count == 1) > + if (oe1->op == c->op) > { > SET_BIT (candidates2, i); > ++nr_candidates2; > > > -- Richard Guenther <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer