Re: [PATCH] Allow un-distribution with repeated factors (PR52976 follow-up)

2012-04-18 Thread Richard Guenther
On Tue, 17 Apr 2012, William J. Schmidt wrote:

> The emergency reassociation patch for PR52976 disabled un-distribution
> in the presence of repeated factors to avoid ICEs in zero_one_operation.
> This patch fixes such cases properly by teaching zero_one_operation
> about __builtin_pow* calls.
> 
> Bootstrapped with no new regressions on powerpc64-linux.  Also built
> SPEC cpu2000 and cpu2006 successfully.  Ok for trunk?

Ok.

Thanks,
Richard

> Thanks,
> Bill
> 
> 
> gcc:
> 
> 2012-04-17  Bill Schmidt  
> 
>   * tree-ssa-reassoc.c (stmt_is_power_of_op): New function.
>   (decrement_power): Likewise.
>   (propagate_op_to_single_use): Likewise.
>   (zero_one_operation): Handle __builtin_pow* calls in linearized
>   expression trees; factor logic into propagate_op_to_single_use.
>   (undistribute_ops_list): Allow operands with repeat counts > 1.
> 
> 
> gcc/testsuite:
> 
> 2012-04-17  Bill Schmidt  
> 
>   gfortran.dg/reassoc_7.f: New test.
>   gfortran.dg/reassoc_8.f: Likewise.
>   gfortran.dg/reassoc_9.f: Likewise.
>   gfortran.dg/reassoc_10.f: Likewise.
> 
> 
> Index: gcc/testsuite/gfortran.dg/reassoc_10.f
> ===
> --- gcc/testsuite/gfortran.dg/reassoc_10.f(revision 0)
> +++ gcc/testsuite/gfortran.dg/reassoc_10.f(revision 0)
> @@ -0,0 +1,17 @@
> +! { dg-do compile }
> +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
> +
> +  SUBROUTINE S55199(P,Q,Dvdph)
> +  implicit none
> +  real(8) :: c1,c2,c3,P,Q,Dvdph
> +  c1=0.1d0
> +  c2=0.2d0
> +  c3=0.3d0
> +  Dvdph = c1 + 2.*P*c2 + 3.*P**2*Q**3*c3
> +  END
> +
> +! There should be five multiplies following un-distribution
> +! and power expansion.
> +
> +! { dg-final { scan-tree-dump-times " \\\* " 5 "optimized" } }
> +! { dg-final { cleanup-tree-dump "optimized" } }
> Index: gcc/testsuite/gfortran.dg/reassoc_7.f
> ===
> --- gcc/testsuite/gfortran.dg/reassoc_7.f (revision 0)
> +++ gcc/testsuite/gfortran.dg/reassoc_7.f (revision 0)
> @@ -0,0 +1,16 @@
> +! { dg-do compile }
> +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
> +
> +  SUBROUTINE S55199(P,Dvdph)
> +  implicit none
> +  real(8) :: c1,c2,c3,P,Dvdph
> +  c1=0.1d0
> +  c2=0.2d0
> +  c3=0.3d0
> +  Dvdph = c1 + 2.*P*c2 + 3.*P**2*c3
> +  END
> +
> +! There should be two multiplies following un-distribution.
> +
> +! { dg-final { scan-tree-dump-times " \\\* " 2 "optimized" } }
> +! { dg-final { cleanup-tree-dump "optimized" } }
> Index: gcc/testsuite/gfortran.dg/reassoc_8.f
> ===
> --- gcc/testsuite/gfortran.dg/reassoc_8.f (revision 0)
> +++ gcc/testsuite/gfortran.dg/reassoc_8.f (revision 0)
> @@ -0,0 +1,17 @@
> +! { dg-do compile }
> +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
> +
> +  SUBROUTINE S55199(P,Dvdph)
> +  implicit none
> +  real(8) :: c1,c2,c3,P,Dvdph
> +  c1=0.1d0
> +  c2=0.2d0
> +  c3=0.3d0
> +  Dvdph = c1 + 2.*P**2*c2 + 3.*P**3*c3
> +  END
> +
> +! There should be three multiplies following un-distribution
> +! and power expansion.
> +
> +! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } }
> +! { dg-final { cleanup-tree-dump "optimized" } }
> Index: gcc/testsuite/gfortran.dg/reassoc_9.f
> ===
> --- gcc/testsuite/gfortran.dg/reassoc_9.f (revision 0)
> +++ gcc/testsuite/gfortran.dg/reassoc_9.f (revision 0)
> @@ -0,0 +1,17 @@
> +! { dg-do compile }
> +! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
> +
> +  SUBROUTINE S55199(P,Dvdph)
> +  implicit none
> +  real(8) :: c1,c2,c3,P,Dvdph
> +  c1=0.1d0
> +  c2=0.2d0
> +  c3=0.3d0
> +  Dvdph = c1 + 2.*P**2*c2 + 3.*P**4*c3
> +  END
> +
> +! There should be three multiplies following un-distribution
> +! and power expansion.
> +
> +! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } }
> +! { dg-final { cleanup-tree-dump "optimized" } }
> Index: gcc/tree-ssa-reassoc.c
> ===
> --- gcc/tree-ssa-reassoc.c(revision 186495)
> +++ gcc/tree-ssa-reassoc.c(working copy)
> @@ -1020,6 +1020,98 @@ oecount_cmp (const void *p1, const void *p2)
>  return c1->id - c2->id;
>  }
>  
> +/* Return TRUE iff STMT represents a builtin call that raises OP
> +   to some exponent.  */
> +
> +static bool
> +stmt_is_power_of_op (gimple stmt, tree op)
> +{
> +  tree fndecl;
> +
> +  if (!is_gimple_call (stmt))
> +return false;
> +
> +  fndecl = gimple_call_fndecl (stmt);
> +
> +  if (!fndecl
> +  || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
> +return false;
> +
> +  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
> +{
> +CASE_FLT_FN (BUILT_IN_POW):
> +CASE_FLT_F

[PATCH] Allow un-distribution with repeated factors (PR52976 follow-up)

2012-04-17 Thread William J. Schmidt
The emergency reassociation patch for PR52976 disabled un-distribution
in the presence of repeated factors to avoid ICEs in zero_one_operation.
This patch fixes such cases properly by teaching zero_one_operation
about __builtin_pow* calls.

Bootstrapped with no new regressions on powerpc64-linux.  Also built
SPEC cpu2000 and cpu2006 successfully.  Ok for trunk?

Thanks,
Bill


gcc:

2012-04-17  Bill Schmidt  

* tree-ssa-reassoc.c (stmt_is_power_of_op): New function.
(decrement_power): Likewise.
(propagate_op_to_single_use): Likewise.
(zero_one_operation): Handle __builtin_pow* calls in linearized
expression trees; factor logic into propagate_op_to_single_use.
(undistribute_ops_list): Allow operands with repeat counts > 1.


gcc/testsuite:

2012-04-17  Bill Schmidt  

gfortran.dg/reassoc_7.f: New test.
gfortran.dg/reassoc_8.f: Likewise.
gfortran.dg/reassoc_9.f: Likewise.
gfortran.dg/reassoc_10.f: Likewise.


Index: gcc/testsuite/gfortran.dg/reassoc_10.f
===
--- gcc/testsuite/gfortran.dg/reassoc_10.f  (revision 0)
+++ gcc/testsuite/gfortran.dg/reassoc_10.f  (revision 0)
@@ -0,0 +1,17 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
+
+  SUBROUTINE S55199(P,Q,Dvdph)
+  implicit none
+  real(8) :: c1,c2,c3,P,Q,Dvdph
+  c1=0.1d0
+  c2=0.2d0
+  c3=0.3d0
+  Dvdph = c1 + 2.*P*c2 + 3.*P**2*Q**3*c3
+  END
+
+! There should be five multiplies following un-distribution
+! and power expansion.
+
+! { dg-final { scan-tree-dump-times " \\\* " 5 "optimized" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
Index: gcc/testsuite/gfortran.dg/reassoc_7.f
===
--- gcc/testsuite/gfortran.dg/reassoc_7.f   (revision 0)
+++ gcc/testsuite/gfortran.dg/reassoc_7.f   (revision 0)
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
+
+  SUBROUTINE S55199(P,Dvdph)
+  implicit none
+  real(8) :: c1,c2,c3,P,Dvdph
+  c1=0.1d0
+  c2=0.2d0
+  c3=0.3d0
+  Dvdph = c1 + 2.*P*c2 + 3.*P**2*c3
+  END
+
+! There should be two multiplies following un-distribution.
+
+! { dg-final { scan-tree-dump-times " \\\* " 2 "optimized" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
Index: gcc/testsuite/gfortran.dg/reassoc_8.f
===
--- gcc/testsuite/gfortran.dg/reassoc_8.f   (revision 0)
+++ gcc/testsuite/gfortran.dg/reassoc_8.f   (revision 0)
@@ -0,0 +1,17 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
+
+  SUBROUTINE S55199(P,Dvdph)
+  implicit none
+  real(8) :: c1,c2,c3,P,Dvdph
+  c1=0.1d0
+  c2=0.2d0
+  c3=0.3d0
+  Dvdph = c1 + 2.*P**2*c2 + 3.*P**3*c3
+  END
+
+! There should be three multiplies following un-distribution
+! and power expansion.
+
+! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
Index: gcc/testsuite/gfortran.dg/reassoc_9.f
===
--- gcc/testsuite/gfortran.dg/reassoc_9.f   (revision 0)
+++ gcc/testsuite/gfortran.dg/reassoc_9.f   (revision 0)
@@ -0,0 +1,17 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math -fdump-tree-optimized" }
+
+  SUBROUTINE S55199(P,Dvdph)
+  implicit none
+  real(8) :: c1,c2,c3,P,Dvdph
+  c1=0.1d0
+  c2=0.2d0
+  c3=0.3d0
+  Dvdph = c1 + 2.*P**2*c2 + 3.*P**4*c3
+  END
+
+! There should be three multiplies following un-distribution
+! and power expansion.
+
+! { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
Index: gcc/tree-ssa-reassoc.c
===
--- gcc/tree-ssa-reassoc.c  (revision 186495)
+++ gcc/tree-ssa-reassoc.c  (working copy)
@@ -1020,6 +1020,98 @@ oecount_cmp (const void *p1, const void *p2)
 return c1->id - c2->id;
 }
 
+/* Return TRUE iff STMT represents a builtin call that raises OP
+   to some exponent.  */
+
+static bool
+stmt_is_power_of_op (gimple stmt, tree op)
+{
+  tree fndecl;
+
+  if (!is_gimple_call (stmt))
+return false;
+
+  fndecl = gimple_call_fndecl (stmt);
+
+  if (!fndecl
+  || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
+return false;
+
+  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
+{
+CASE_FLT_FN (BUILT_IN_POW):
+CASE_FLT_FN (BUILT_IN_POWI):
+  return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
+  
+default:
+  return false;
+}
+}
+
+/* Given STMT which is a __builtin_pow* call, decrement its exponent
+   in place and return the result.  Assumes that stmt_is_power_of_op
+   was previously called for STMT and returne