This enhances constant folding for division by complex and vector
constants.  When -freciprocal-math is present, such divisions are
converted into multiplies by the constant reciprocal.  When an exact
reciprocal is available, this is done for vector constants when
optimizing.  I did not implement logic for exact reciprocals of complex
constants because either (a) the complexity doesn't justify the
likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)

Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
for trunk?

Thanks,
Bill


gcc:

2012-04-19  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        PR rtl-optimization/44214
        * fold-const.c (exact_inverse): New function.
        (fold_binary_loc): Fold vector and complex division by constant into
        multiply by recripocal with flag_reciprocal_math; fold vector division
        by constant into multiply by reciprocal with exact inverse.

gcc/testsuite:

2012-04-19  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        PR rtl-optimization/44214
        * gcc.target/powerpc/pr44214-1.c: New test.
        * gcc.dg/pr44214-2.c: Likewise.
        * gcc.target/powerpc/pr44214-3.c: Likewise.


Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c    (revision 186573)
+++ gcc/fold-const.c    (working copy)
@@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc,
   return NULL_TREE;
 }
 
+/* If the real or vector real constant CST of type TYPE has an exact
+   inverse, return it, else return NULL.  */
+
+static tree
+exact_inverse (tree type, tree cst)
+{
+  REAL_VALUE_TYPE r;
+  tree unit_type, *elts;
+  enum machine_mode mode;
+  unsigned vec_nelts, i;
+
+  switch (TREE_CODE (cst))
+    {
+    case REAL_CST:
+      r = TREE_REAL_CST (cst);
+
+      if (exact_real_inverse (TYPE_MODE (type), &r))
+       return build_real (type, r);
+
+      return NULL_TREE;
+
+    case VECTOR_CST:
+      vec_nelts = VECTOR_CST_NELTS (cst);
+      elts = XALLOCAVEC (tree, vec_nelts);
+      unit_type = TREE_TYPE (type);
+      mode = TYPE_MODE (unit_type);
+
+      for (i = 0; i < vec_nelts; i++)
+       {
+         r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
+         if (!exact_real_inverse (mode, &r))
+           return NULL_TREE;
+         elts[i] = build_real (unit_type, r);
+       }
+
+      return build_vector (type, elts);
+
+    default:
+      return NULL_TREE;
+    }
+}
+
 /* Fold a binary expression of code CODE and type TYPE with operands
    OP0 and OP1.  LOC is the location of the resulting expression.
    Return the folded expression if folding is successful.  Otherwise,
@@ -11734,23 +11776,25 @@ fold_binary_loc (location_t loc,
         so only do this if -freciprocal-math.  We can actually
         always safely do it if ARG1 is a power of two, but it's hard to
         tell if it is or not in a portable manner.  */
-      if (TREE_CODE (arg1) == REAL_CST)
+      if (TREE_CODE (arg1) == REAL_CST
+         || (TREE_CODE (arg1) == COMPLEX_CST
+             && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
+         || (TREE_CODE (arg1) == VECTOR_CST
+             && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))
        {
          if (flag_reciprocal_math
-             && 0 != (tem = const_binop (code, build_real (type, dconst1),
+             && 0 != (tem = fold_binary (code, type, build_one_cst (type),
                                          arg1)))
            return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
-         /* Find the reciprocal if optimizing and the result is exact.  */
-         if (optimize)
+         /* Find the reciprocal if optimizing and the result is exact.
+            TODO: Complex reciprocal not implemented.  */
+         if (optimize
+             && TREE_CODE (arg1) != COMPLEX_CST)
            {
-             REAL_VALUE_TYPE r;
-             r = TREE_REAL_CST (arg1);
-             if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
-               {
-                 tem = build_real (type, r);
-                 return fold_build2_loc (loc, MULT_EXPR, type,
-                                     fold_convert_loc (loc, type, arg0), tem);
-               }
+             tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
+
+             if (inverse)
+               return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
            }
        }
       /* Convert A/B/C to A/(B*C).  */
Index: gcc/testsuite/gcc.target/powerpc/pr44214-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr44214-3.c        (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr44214-3.c        (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=power7 -fdump-tree-optimized" } */
+
+void do_div (vector double *a, vector double *b)
+{
+  *a = *b / (vector double) { 2.0, 2.0 };
+}
+
+/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
+   by the reciprocals of the vector elements.  As a result there should be
+   one vector multiply and zero divides in the optimized code.  The string
+   " * " occurs 3 times: one multiply and two indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.target/powerpc/pr44214-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr44214-1.c        (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr44214-1.c        (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -mcpu=power7 -fdump-tree-optimized" } */
+
+void do_div (vector double *a, vector double *b)
+{
+  *a = *b / (vector double) { 2.0, 3.0 };
+}
+
+/* Constant folding should multiply *b by the reciprocals of the
+   vector elements.  As a result there should be one vector multiply
+   and zero divides in the optimized code.  The string " * " occurs
+   3 times: one multiply and two indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/pr44214-2.c
===================================================================
--- gcc/testsuite/gcc.dg/pr44214-2.c    (revision 0)
+++ gcc/testsuite/gcc.dg/pr44214-2.c    (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+
+void do_div (_Complex double *a, _Complex double *b)
+{
+  *a = *b / (4.0 - 5.0fi);
+}
+
+/* Constant folding should multiply *b by the reciprocal of 4-5i
+   = 4/41 - (5/41)i.  As a result there should be 4 multiplies and
+   zero divides in the optimized code.  The string " * " occurs 6
+   times: 4 multiplies and 2 indirect parameters.  */
+
+/* { dg-final { scan-tree-dump-times " \\\* " 6 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
 

Reply via email to