Hi all,

The patch implements the some of the division optimizations discussed in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026 .

We now reassociate (as discussed in the bug report):

    x / (y * y) -> x  * (1 / y) * (1 / y)

If it is reasonable to do so. This is done with
-funsafe-math-optimizations.

Bootstrapped and regtested with part (1/2). OK for trunk?

Jackson

gcc/

2017-08-03  Jackson Woodruff  <jackson.woodr...@arm.com>

        PR 71026/tree-optimization
        * tree-ssa-math-opts (is_division_by_square,
        is_square_of, insert_sqaure_reciprocals): New.
        (insert_reciprocals): Change to insert reciprocals
        before a division by a square.
        (execute_cse_reciprocals_1): Change to consider
        division by a square.


gcc/testsuite

2017-08-03  Jackson Woodruff  <jackson.woodr...@arm.com>

        PR 71026/tree-optimization
        * gcc.dg/associate_division_1.c: New.

diff --git a/gcc/testsuite/gcc.dg/associate_division_1.c 
b/gcc/testsuite/gcc.dg/associate_division_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..187d3597af8825a6a43f29bfaa68b089d2d5bbfe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/associate_division_1.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized" } */
+
+float
+extract_square (float x, float y, float *a, float *b)
+{
+  *a = 3 / (y * y);
+  *b = 5 / (y * y);
+
+  return x / (y * y);
+}
+
+/* Don't expect the 'powmult' (calculation of y * y)
+   to be deleted until a later pass, so look for one
+   more multiplication than strictly necessary.  */
+float
+extract_recip (float *w, float x, float y, float z)
+{
+  *w = 7 / y;
+
+  return x / (y * y) + z / y;
+}
+
+float
+neg_recip (float x, float y, float z)
+{
+  return (x / y) + (z / -y);
+}
+
+float
+const_divisor (float *w, float x, float y, float z)
+{
+  *w = 5 / y;
+  return x / (y * 3.0f) + z / y;
+}
+
+/* 4 For the pointers to a, b, w and w, 4 multiplications in 'extract_square',
+   5 multiplications in 'extract_recip', 0 multiplications
+   in 'neg_recip', 3 multiplcations in 'const_divisor' expected.  */
+/* { dg-final { scan-tree-dump-times " \\* " 14 "optimized" } } */
+
+/* 1 division in 'extract_square', 1 division in 'extract_recip',
+   1 division in 'neg_recip', 1 division in 'const_divisor'.  */
+/* { dg-final { scan-tree-dump-times " / " 4 "optimized" } } */
+
+
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 
7ac1659fa0670b7080685f3f9513939807073a63..0db160f68001ffb90942c4002703625430128b9f
 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -340,6 +340,38 @@ is_division_by (gimple *use_stmt, tree def)
         && gimple_assign_rhs1 (use_stmt) != def;
 }
 
+/* Return whether USE_STMT is DEF * DEF.  */
+static inline bool
+is_square_of (gimple *use_stmt, tree def)
+{
+  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+      && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
+    {
+      tree op0 = gimple_assign_rhs1 (use_stmt);
+      tree op1 = gimple_assign_rhs2 (use_stmt);
+
+      return op0 == op1 && op0 == def;
+    }
+  return 0;
+}
+
+/* Return whether USE_STMT is a floating-point division by
+   DEF * DEF.  */
+static inline bool
+is_division_by_square (gimple *use_stmt, tree def)
+{
+  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+      && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR)
+    {
+      tree denominator = gimple_assign_rhs2 (use_stmt);
+      if (TREE_CODE (denominator) == SSA_NAME)
+       {
+         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
+       }
+    }
+  return 0;
+}
+
 /* Walk the subset of the dominator tree rooted at OCC, setting the
    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
    the given basic block.  The field may be left NULL, of course,
@@ -369,14 +401,16 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct 
occurrence *occ,
                                      build_one_cst (type), def);
 
       if (occ->bb_has_division)
-        {
-          /* Case 1: insert before an existing division.  */
-          gsi = gsi_after_labels (occ->bb);
-          while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
+       {
+         /* Case 1: insert before an existing division.  */
+         gsi = gsi_after_labels (occ->bb);
+         while (!gsi_end_p (gsi)
+                && (!is_division_by (gsi_stmt (gsi), def))
+                && (!is_division_by_square (gsi_stmt (gsi), def)))
            gsi_next (&gsi);
 
-          gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
-        }
+         gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+       }
       else if (def_gsi && occ->bb == def_gsi->bb)
         {
           /* Case 2: insert right after the definition.  Note that this will
@@ -403,6 +437,80 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct 
occurrence *occ,
 }
 
 
+/* Walk the tree until we find the first division by a square.  Insert
+   the definition of the reciprocal there.  This returns that definition,
+   or if there is an alternate definition earlier, then it returns that
+   instead.  */
+
+static tree
+insert_square_reciprocals (struct occurrence *occ, tree def)
+{
+  gimple_stmt_iterator gsi = gsi_after_labels (occ->bb);
+
+  while (!gsi_end_p (gsi)
+        && !is_division_by (gsi_stmt (gsi), def)
+        /* Check to see if a calculation statement has already
+           been inserted.  */
+        && !is_square_of (gsi_stmt (gsi), occ->recip_def))
+    gsi_next (&gsi);
+
+  if (gsi_end_p (gsi))
+      return NULL;
+  else if (is_square_of (gsi_stmt (gsi), occ->recip_def))
+      return gimple_assign_lhs (gsi_stmt (gsi));
+  else
+    {
+      tree type = TREE_TYPE (def);
+      tree recip_square_def = create_tmp_reg (type, "powmult_reciptmp");
+      gassign *new_stmt = gimple_build_assign (recip_square_def, MULT_EXPR,
+                                              occ->recip_def, occ->recip_def);
+      gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+      return recip_square_def;
+    }
+}
+
+/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
+   Unlike replace_reciprocals, we expect use_p to be a square definition
+   (i.e. use_p is _ = x * x).  Iterate though all uses of this square
+   and replace them.  */
+static inline void
+replace_reciprocal_squares (use_operand_p use_p)
+{
+  gimple *use_stmt = USE_STMT (use_p);
+  basic_block bb = gimple_bb (use_stmt);
+  struct occurrence *occ = (struct occurrence *) bb->aux;
+  imm_use_iterator use_iter;
+  tree def_name = gimple_assign_lhs (use_stmt);
+  use_operand_p square_use_p;
+  tree squared_reciprocal = insert_square_reciprocals (occ, def_name);
+
+  if (optimize_bb_for_speed_p (bb) && squared_reciprocal
+      && occ->recip_def && use_stmt != occ->recip_def_stmt)
+    {
+
+      /* Find all occurrences of the use name and replace
+        them by multiplications of this new inverse.  */
+      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def_name)
+       {
+         FOR_EACH_IMM_USE_ON_STMT (square_use_p, use_iter)
+           {
+             gimple *square_use = USE_STMT (square_use_p);
+
+             if (gimple_assign_rhs_code (square_use) == RDIV_EXPR)
+               {
+                 gimple_assign_set_rhs_code (square_use, MULT_EXPR);
+                 gimple_assign_set_rhs2 (square_use, squared_reciprocal);
+                 SET_USE (square_use_p, squared_reciprocal);
+
+                 reciprocal_stats.rdivs_inserted++;
+                 update_stmt (square_use);
+               }
+           }
+       }
+    }
+}
+
+
 /* Replace the division at USE_P with a multiplication by the reciprocal, if
    possible.  */
 
@@ -460,10 +568,12 @@ free_bb (struct occurrence *occ)
 static void
 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 {
-  use_operand_p use_p;
-  imm_use_iterator use_iter;
+  use_operand_p use_p, square_use_p;
+  imm_use_iterator use_iter, square_use_iter;
+  tree square_def;
   struct occurrence *occ;
   int count = 0, threshold;
+  int square_recip_count = 0;
 
   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 
@@ -475,11 +585,26 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, 
tree def)
          register_division_in (gimple_bb (use_stmt));
          count++;
        }
+
+      if (is_square_of (use_stmt, def))
+       {
+         square_def = gimple_assign_lhs (use_stmt);
+         FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
+           {
+             gimple *square_use_stmt = USE_STMT (square_use_p);
+             if (is_division_by (square_use_stmt, square_def))
+               {
+                 register_division_in (gimple_bb (square_use_stmt));
+                 square_recip_count++;
+               }
+           }
+       }
     }
 
   /* Do the expensive part only if we can hope to optimize something.  */
   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE 
(def)));
-  if (count >= threshold)
+  if (count + square_recip_count >= threshold
+      && count >= 1)
     {
       gimple *use_stmt;
       for (occ = occ_head; occ; occ = occ->next)
@@ -495,6 +620,11 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, 
tree def)
              FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
                replace_reciprocal (use_p);
            }
+         else if (is_square_of (use_stmt, def))
+           {
+             FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
+               replace_reciprocal_squares (use_p);
+           }
        }
     }
 

Reply via email to