Re: ivopts improvement

2011-08-25 Thread Tom de Vries
Hi Zdenek,

here's the updated version of the patch.

The goal is to remove the 'i' iterator from the following example, by
replacing 'i  n' with 'p  base + n'.

void
f (char *base, unsigned long int i, unsigned long int n)
{
  char *p = base + i;
  do
{
  *p = '\0';
  p++;
  i++;
   }
  while (i  n);
}

bootstrapped and reg-tested on x864_64, and build and reg-tested on MIPS.

I will sent a test-case in a separate email.

OK for trunk?

Thanks,
- Tom

2011-08-25  Zdenek Dvorak  o...@ucw.cz
Tom de Vries  t...@codesourcery.com

* tree-ssa-loop-ivopts.c (struct cost_pair): Add comp field.
(struct ivopts_data): Add loop_single_exit_p field.
(niter_for_exit): Change parameter desc_p into return value.  Return
desc if desc-may_be_zero.  Free desc if unused.
(niter_for_single_dom_exit): Change return type.
(find_induction_variables): Handle changed return type of
niter_for_single_dom_exit.  Dump may_be_zero.
(add_candidate_1): Keep original base and step type for IP_ORIGINAL.
(set_use_iv_cost): Add and handle comp parameter.
(determine_use_iv_cost_generic, determine_use_iv_cost_address): Add
comp argument to set_use_iv_cost.
(strip_wrap_conserving_type_conversions, expr_equal_p)
(difference_cannot_overflow_p, iv_elimination_compare_lt): New function.
(may_eliminate_iv): Add comp parameter.  Handle new return type of
niter_for_exit.  Use loop_single_exit_p.  Use iv_elimination_compare_lt.
(determine_use_iv_cost_condition): Add comp argument to set_use_iv_cost
and may_eliminate_iv.
(rewrite_use_compare): Move call to iv_elimination_compare to ...
(may_eliminate_iv): Here.
(tree_ssa_iv_optimize_loop): Initialize loop_single_exit_p.
Index: gcc/tree-ssa-loop-ivopts.c
===
--- gcc/tree-ssa-loop-ivopts.c	(revision 176554)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -176,6 +176,7 @@ struct cost_pair
   tree value;		/* For final value elimination, the expression for
 			   the final value of the iv.  For iv elimination,
 			   the new bound to compare with.  */
+  enum tree_code comp;	/* For iv elimination, the comparison.  */
   int inv_expr_id;  /* Loop invariant expression id.  */
 };
 
@@ -297,6 +298,9 @@ struct ivopts_data
 
   /* Whether the loop body includes any function calls.  */
   bool body_includes_call;
+
+  /* Whether the loop body can only be exited via single exit.  */
+  bool loop_single_exit_p;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -770,15 +774,13 @@ contains_abnormal_ssa_name_p (tree expr)
   return false;
 }
 
-/*  Returns tree describing number of iterations determined from
+/*  Returns the structure describing number of iterations determined from
 EXIT of DATA-current_loop, or NULL if something goes wrong.  */
 
-static tree
-niter_for_exit (struct ivopts_data *data, edge exit,
-struct tree_niter_desc **desc_p)
+static struct tree_niter_desc *
+niter_for_exit (struct ivopts_data *data, edge exit)
 {
-  struct tree_niter_desc* desc = NULL;
-  tree niter;
+  struct tree_niter_desc *desc;
   void **slot;
 
   if (!data-niters)
@@ -791,37 +793,31 @@ niter_for_exit (struct ivopts_data *data
 
   if (!slot)
 {
-  /* Try to determine number of iterations.  We must know it
-	 unconditionally (i.e., without possibility of # of iterations
-	 being zero).  Also, we cannot safely work with ssa names that
-	 appear in phi nodes on abnormal edges, so that we do not create
-	 overlapping life ranges for them (PR 27283).  */
+  /* Try to determine number of iterations.  We cannot safely work with ssa
+ names that appear in phi nodes on abnormal edges, so that we do not
+ create overlapping life ranges for them (PR 27283).  */
   desc = XNEW (struct tree_niter_desc);
-  if (number_of_iterations_exit (data-current_loop,
- exit, desc, true)
-	   integer_zerop (desc-may_be_zero)
- 	   !contains_abnormal_ssa_name_p (desc-niter))
-	niter = desc-niter;
-  else
-	niter = NULL_TREE;
-
-  desc-niter = niter;
+  if (!number_of_iterations_exit (data-current_loop,
+  exit, desc, true)
+ 	  || contains_abnormal_ssa_name_p (desc-niter))
+	{
+	  XDELETE (desc);
+	  desc = NULL;
+	}
   slot = pointer_map_insert (data-niters, exit);
   *slot = desc;
 }
   else
-niter = ((struct tree_niter_desc *) *slot)-niter;
+desc = (struct tree_niter_desc *) *slot;
 
-  if (desc_p)
-*desc_p = (struct tree_niter_desc *) *slot;
-  return niter;
+  return desc;
 }
 
-/* Returns tree describing number of iterations determined from
+/* Returns the structure describing number of iterations determined from
single dominating exit of DATA-current_loop, or NULL if something
goes wrong.  */
 
-static tree
+static struct tree_niter_desc *
 

Re: ivopts improvement

2011-08-25 Thread Tom de Vries
On 08/25/2011 01:42 PM, Tom de Vries wrote:
 Hi Zdenek,
 
 here's the updated version of the patch.
 
 The goal is to remove the 'i' iterator from the following example, by
 replacing 'i  n' with 'p  base + n'.
 
 void
 f (char *base, unsigned long int i, unsigned long int n)
 {
   char *p = base + i;
   do
 {
   *p = '\0';
   p++;
   i++;
}
   while (i  n);
 }
 
 bootstrapped and reg-tested on x864_64, and build and reg-tested on MIPS.
 
 I will sent a test-case in a separate email.
 

OK for trunk?

2011-08-25  Tom de Vries  t...@codesourcery.com

* gcc.dg/tree-ssa/ivopts-lt.c: New test.
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
===
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c (revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-ivopts } */
+
+void
+f1 (char *p, unsigned long int i, unsigned long int n)
+{
+  p += i;
+  do
+{
+  *p = '\0';
+  p += 1;
+  i++;
+}
+  while (i  n);
+}
+
+/* { dg-final { scan-tree-dump-times PHI 1 ivopts} } */
+/* { dg-final { scan-tree-dump-times PHI p_ 1 ivopts} } */
+/* { dg-final { scan-tree-dump-times p_\[0-9\]*  1 ivopts} } */
+/* { dg-final { cleanup-tree-dump ivopts } } */


Re: ivopts improvement

2011-08-25 Thread Zdenek Dvorak
Hi,

 here's the updated version of the patch.
 
 The goal is to remove the 'i' iterator from the following example, by
 replacing 'i  n' with 'p  base + n'.
 
 void
 f (char *base, unsigned long int i, unsigned long int n)
 {
   char *p = base + i;
   do
 {
   *p = '\0';
   p++;
   i++;
}
   while (i  n);
 }
 
 bootstrapped and reg-tested on x864_64, and build and reg-tested on MIPS.
 
 I will sent a test-case in a separate email.
 
 OK for trunk?

OK,

Zdenek


Re: ivopts improvement

2011-03-15 Thread Tom de Vries
Hi Zdenek,

I rewrote the patch to remove the use of use_uses_inced_iv.

On 03/14/2011 03:03 PM, Zdenek Dvorak wrote:
 Hi,
 
 (since the use_uses_inced_iv test is meaningless).

 To me it seems use_uses_inced_iv has meaning:
 - it models something: it states whether the comparison is using
   the iv increment result or the iv phi result.
 
 but that has nothing to do with the value of the iv.  For instance,
 in the following:
 
 a = phi (0, a')
 b = phi (1, b')
 c = phi (1, c')
 
 a' = a + 1;
 tmp = b;
 
 compare (a'/tmp/c, something)
 
 b' = tmp + 1;
 c' = c + 1;
 
 a', tmp and c are completely equivalent, yet your code for some reason claims
 to handle c and the other two differently.

That a' and c have the same value and the code handles them differently
does not necessarily mean that the code is incorrect.  The way I would
formulate it is that the code has implementation limitations, which are
expressed in an awkward way.

 tmp = b;

You're right, the calculation of use_uses_inced_iv assumed that it looks
at either the iv phi or the iv increment, so that was not safe.


I'll try to explain what my intention with the code is, by looking at a
pre-ivopt level example.

bb 2:
  p_5 = p_3(D) + i_4(D);

bb 3:
  # p_1 = PHI p_5(2), p_6(4)
  # i_2 = PHI i_4(D)(2), i_7(4)
  *p_1 = 0;
  p_6 = p_1 + 1;
  i_7 = i_2 + 1;
  if (i_7  n_8(D))
goto bb 4;
  else
goto bb 5;

bb 4:
  goto bb 3;

bb 5:
  return;

In this example, I try to analyze whether it is safe to replace
  i_7  n_8
by
  p_6  p_3 + n_8.

What I tried to do previously was to first prove a relation p_1 == i_2 +
ssa_name between i_2 and p_1, and then figure out (in the awkward code)
if I was allowed to assume p_6 == i_7 + ssa_name.

Now, I try to prove that relation between i_7 and p_6 instead, which
makes the code in iv_elimination_compare_lt simpler.

This has as casualty though that the 'int' case (the case that use-iv
is not compatible with sizetype) does not work anymore. That would need
more work.

The code still has the same limitation, meaning it will transform the
first 2 examples, but not the last 2 examples of the series of 4
mentioned earlier.

Changes compared to previous submission:
- changed name of fold_walk_def_plus into fold_diff_to_ssa_name
- added extra intelligence in fold_diff_to_ssa_name to deal with
  (a + x) - (b - x).
- don't calculate real_iv_use_base, calculate instead base_cand_at_use
  to simplify code

Thanks,
-Tom
diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c
--- gcc/tree-ssa-loop-ivopts.c	(working copy)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -419,6 +419,89 @@
   return fold_build2 (code, TREE_TYPE (a), a, b);
 }
 
+/* Folds (TREE_TYPE (A))(A CODE B), where CODE is either PLUS_EXPR or
+   MINUS_EXPR.  Returns the folded expression if folding is successful.
+   Otherwise, return NULL_TREE.  Handles the case that A is a pointer
+   robustly.  */
+
+static inline tree
+fold_plus (enum tree_code code, tree a, tree b)
+{
+  tree a_type = TREE_TYPE (a);
+  tree res;
+
+  STRIP_NOPS (a);
+  robust_plus (code, a, b);
+
+  res = fold_binary (code, TREE_TYPE (a), a, b);
+  if (res == NULL_TREE)
+return NULL_TREE;
+
+  return fold_convert (a_type, res);
+}
+
+/* Folds (TREE_TYPE (A))(A - B), possibly using the defining stmt of A.  Returns
+   the folded expression if folding is successful and resulted in an ssa_name.
+   Otherwise, return NULL_TREE.  */
+
+static inline tree
+fold_diff_to_ssa_name (tree a, tree b)
+{
+  tree a_type = TREE_TYPE (a);
+  tree res, a0, a1;
+  gimple def_stmt;
+
+  res = fold_plus (MINUS_EXPR, a, b);
+  if (res != NULL_TREE)
+{
+  STRIP_NOPS (res);
+  if (TREE_CODE (res) == SSA_NAME)
+	return fold_convert (a_type, res);
+}
+
+  STRIP_NOPS (a);
+  STRIP_NOPS (b);
+
+  if (TREE_CODE (a) == PLUS_EXPR  TREE_CODE (b) == PLUS_EXPR
+   TREE_OPERAND (a, 1) == TREE_OPERAND (b, 1))
+{
+  a = TREE_OPERAND (a, 0);
+  b = TREE_OPERAND (b, 0);
+
+  STRIP_NOPS (a);
+  STRIP_NOPS (b);
+
+  res = fold_plus (MINUS_EXPR, a, b);
+  if (res != NULL_TREE)
+	{
+	  STRIP_NOPS (res);
+	  if (TREE_CODE (res) == SSA_NAME)
+	return fold_convert (a_type, res);
+	}
+}
+
+  if (TREE_CODE (a) != SSA_NAME)
+return NULL_TREE;
+
+  def_stmt = SSA_NAME_DEF_STMT (a);
+  if (!is_gimple_assign (def_stmt)
+  || (gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
+	   gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR))
+return NULL_TREE;
+  a0 = gimple_assign_rhs1 (def_stmt);
+  a1 = gimple_assign_rhs2 (def_stmt);
+
+  res = fold_plus (MINUS_EXPR, fold_build_plus (PLUS_EXPR, a0, a1), b);
+  if (res != NULL_TREE)
+{
+  STRIP_NOPS (res);
+  if (TREE_CODE (res) == SSA_NAME)
+	return fold_convert (a_type, res);
+}
+
+  return NULL_TREE;
+}
+
 /* Number of uses recorded in DATA.  */
 
 static inline unsigned
@@ -825,17 +908,25 @@
 
   if (!slot)
 {
-  /* Try to determine number of iterations.  We must know it
-	 

Re: ivopts improvement

2011-03-14 Thread Zdenek Dvorak
Hi,

  +  /* Determine if the exit test is formulated in terms of the phi or the
  + increment of the use iv.  */
  +  use_uses_inced_iv
  += gimple_code (SSA_NAME_DEF_STMT (use-iv-ssa_name)) != GIMPLE_PHI;
  +
  +  /* Determine if the exit test is before or after the increment of the
  + cand.  */
  +  use_after_cand_inc
  += stmt_after_increment (data-current_loop, cand, use-stmt);
  +
  +  /* For now, we only handle these cases.  */
  +  if (use_after_cand_inc != use_uses_inced_iv)
  +return false;
  
  what is this trying to achieve?  USE_USES_INCED_IV is pretty much 
  meaningless --
  the value of USE does not depend in any way on whether it comes
  directly from
  a PHI node or from some other calculation.
 
 it is trying to allow for
 
 do
   {
 *p = '\0';
 i++; /* use_uses_inced_iv == true */
 p++; /* use_after_cand_inc == true */
 if (!(i  n))
   break;
   }
 while (1);
 
 and for
 
 do
   {
 *p = '\0';
 if (!(i  n))
   break;
 i++; /* use_uses_inced_iv == false */
 p++; /* use_after_cand_inc == false */
   }
 while (1);
 
 but not for
 
 do
   {
 *p = '\0';
 i++; /* use_uses_inced_iv == true */
 if (!(i  n))
   break;
 p++; /* use_after_cand_inc == false */
   }
 while (1);
 
 and not for
 
 do
   {
 *p = '\0';
 p++; /* use_after_cand_inc == true */
 if (!(i  n))
   break;
 i++; /* use_uses_inced_iv == false */
   }
 while (1);
 
 In the latter 2 cases, I cannot simply replace i  n with p  base + n.

Why not (other than that the value to compare with varies in these cases, but
it always is the value of p in the last iteration of the loop)?

One more thing: what is fold_walk_def_plus for?

Zdenek


Re: ivopts improvement

2011-03-14 Thread Zdenek Dvorak
Hi,

  it is trying to allow for
 
  do
{
  *p = '\0';
  i++; /* use_uses_inced_iv == true */
  p++; /* use_after_cand_inc == true */
  if (!(i  n))
break;
}
  while (1);
 
  and for
 
  do
{
  *p = '\0';
  if (!(i  n))
break;
  i++; /* use_uses_inced_iv == false */
  p++; /* use_after_cand_inc == false */
}
  while (1);
 
  but not for
 
  do
{
  *p = '\0';
  i++; /* use_uses_inced_iv == true */
  if (!(i  n))
break;
  p++; /* use_after_cand_inc == false */
}
  while (1);
 
  and not for
 
  do
{
  *p = '\0';
  p++; /* use_after_cand_inc == true */
  if (!(i  n))
break;
  i++; /* use_uses_inced_iv == false */
}
  while (1);
 
  In the latter 2 cases, I cannot simply replace i  n with p  base + n.
  
  Why not (other than that the value to compare with varies in these cases, 
  but
  it always is the value of p in the last iteration of the loop)?
 
 In the 2 latter cases, the value to compare with will be either base + n
 + 1 or base + n - 1. The code does not handle these cases yet.

well, if not, then it certainly handles one of the first two cases incorrectly
(since the use_uses_inced_iv test is meaningless).

  One more thing: what is fold_walk_def_plus for?
 
 It tries to fold a plus, and if not successful, finds ssa vars in
 operands of the plus, substitutes the defining statements of ssa vars
 for those ssa vars and retries folding.

Sorry for not being more clear; I meant, why is it used?

Zdenek


Re: ivopts improvement

2011-03-04 Thread Tom de Vries
On 03/04/2011 08:37 AM, Paolo Bonzini wrote:
 On 03/03/2011 03:28 PM, Tom de Vries wrote:
 reg-tested on x86_64. Better?
 
 Yes, very much so 

Great. Thanks for the review.

 (talking about patch 6.5; the other one is an 
 optimization but not essential based on the new comments).

 Just one question: what happens if the COND_EXPR is indeed turned into a 
 MAX_EXPR?  Is it a missed optimization?

It is. It will take some effort to get cost calculation for MAX_EXPR ok.

One additional problem (beside costs) that I observed also might need
fixing: a new bound (containing a MAX_EXPR) is generated for an inner
loop. The new bound is outer loop invariant. The MAX_EXPR however
expands into control flow, and is not hoisted out of the outer loop,
while the rest of the bound calculation is.

 Is it because of overflow that 
 you aren't _always_ creating a MAX_EXPR directly?

Indeed. The COND_EXPR created for my example is:
...
  (i + 1 = n) ? (~i + n) : 0
...
where i and n are unsigned int.

simplified:
...
  (i + 1 = n) ? (n - (i + 1)) : 0
...

The condition i + 1 = n precisely states the cases when n - (i + 1)
does not underflow.

Thanks,
- Tom


Re: ivopts improvement

2011-03-04 Thread Paolo Bonzini

On 03/04/2011 02:57 PM, Tom de Vries wrote:

The MAX_EXPR however
expands into control flow, and is not hoisted out of the outer loop,
while the rest of the bound calculation is.


That looks like a pass-ordering problem too (both on the tree level, for 
ivopts versus invariant motion, and on the RTL level where predicated 
instructions are created after invariant motion).


I just noticed that ARM doesn't have named {s,u}{min,max} patterns. 
Perhaps adding those helps hoisting the control-flow out of the loop in 
your case.


Paolo


Re: ivopts improvement

2011-03-04 Thread Zdenek Dvorak
Hi,

/* Whether the loop body includes any function calls.  */
bool body_includes_call;
 +
 +  /* Whether the loop body includes any function calls that possibly have 
 side
 + effects.  */
 +  bool body_includes_side_effect_call;
  };
  
  /* An assignment of iv candidates to uses.  */
 @@ -456,6 +460,20 @@
return exit;
  }
  
 +/* Returns true if single_exit (DATA-current_loop) is the only possible 
 exit.
 +   Uses the same logic as loop_only_exit_p.  */

why are you duplicating the functionality, instead of simply caching the result
of loop_only_exit_p?

 +/* Tries to detect
 + NIT == (use_iv_max  USE-iv-base)
 +? 0
 +: (use_iv_max - USE-iv-base)
 +   where
 + use_iv_real_base == (USE-iv-base - USE-iv-step)
 +  CAND-iv-base == base_ptr + use_iv_real_base
 +   and returns the exclusive upper bound for CAND-var_after:
 + base_ptr + use_iv_max.  */
 +
 +static tree
 +get_lt_bound (struct iv_use *use, struct iv_cand *cand, tree nit)
 +{
...
 +  /* use_iv_real_base == use-iv-base - use-iv-step.  */
 +  use_iv_real_base = fold_build_plus (MINUS_EXPR, use-iv-base, 
 use-iv-step);
 +
 +  /* cand_iv_base.  */
 +
 +  /* cand-iv-base == base_ptr + use_iv_real_base.  */
...
 +  /* 0.  */
...

This function seriously needs better comments.  All that are currently present 
just
give relations between variables that can be as easily seen from the code (but
do not at all explain what the variables are supposed to mean), or make no sense
(what does the 0. comment mean?)

Otherwise the patch looks ok (but I would still like to see get_lt_bound with 
proper
comments, currently I don't really understand what happens there),

Zdenek