Re: Fix profile updating after outer loop unswitching

2017-02-14 Thread Richard Biener
On Tue, Feb 14, 2017 at 12:22 PM, Martin Liška  wrote:
> On 02/05/2017 06:28 PM, Jan Hubicka wrote:
>> +  /* ... finally scale everything in the loop except for guarded basic 
>> blocks
>> + where profile does not change.  */
>> +  basic_block *body = get_loop_body (loop);
>
> Hello.
>
> This hunk causes a new memory leak:
>
> ==24882== 64 bytes in 1 blocks are definitely lost in loss record 328 of 892
>
> ==24882==at 0x4C29110: malloc (in 
> /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
>
> ==24882==by 0x115DFF7: xmalloc (xmalloc.c:147)
>
> ==24882==by 0x6FADCC: get_loop_body(loop const*) (cfgloop.c:834)
>
> ==24882==by 0xB77520: hoist_guard (tree-ssa-loop-unswitch.c:881)
>
> ==24882==by 0xB77520: tree_unswitch_outer_loop 
> (tree-ssa-loop-unswitch.c:536)
>
> ==24882==by 0xB77520: tree_ssa_unswitch_loops() 
> (tree-ssa-loop-unswitch.c:104)
>
> ==24882==by 0x99C65E: execute_one_pass(opt_pass*) (passes.c:2465)
>
> ==24882==by 0x99CE17: execute_pass_list_1(opt_pass*) (passes.c:2554)
>
> ==24882==by 0x99CE29: execute_pass_list_1(opt_pass*) (passes.c:2555)
>
> ==24882==by 0x99CE29: execute_pass_list_1(opt_pass*) (passes.c:2555)
>
> ==24882==by 0x99CE74: execute_pass_list(function*, opt_pass*) 
> (passes.c:2565)
>
> ==24882==by 0x71E745: cgraph_node::expand() (cgraphunit.c:2038)
>
> ==24882==by 0x71FCC3: expand_all_functions (cgraphunit.c:2174)
>
> ==24882==by 0x71FCC3: symbol_table::compile() (cgraphunit.c:2531)
>
> ==24882==by 0x7214DB: symbol_table::finalize_compilation_unit() 
> (cgraphunit.c:2621)
>
> ==24882==by 0xA5B3AB: compile_file() (toplev.c:492)
>
> ==24882==by 0x5F3A78: do_compile (toplev.c:1984)
>
> ==24882==by 0x5F3A78: toplev::main(int, char**) (toplev.c:2118)
>
> ==24882==by 0x5F5826: main (main.c:39)
>
>
> Fixed in attached patch that can bootstrap on ppc64le-redhat-linux and 
> survives regression tests.
>
> Ready to be installed?

Ok.

Thanks,
Richard.

> Martin
>


Re: Fix profile updating after outer loop unswitching

2017-02-14 Thread Martin Liška
On 02/05/2017 06:28 PM, Jan Hubicka wrote:
> +  /* ... finally scale everything in the loop except for guarded basic blocks
> + where profile does not change.  */
> +  basic_block *body = get_loop_body (loop);

Hello.

This hunk causes a new memory leak:

==24882== 64 bytes in 1 blocks are definitely lost in loss record 328 of 892

==24882==at 0x4C29110: malloc (in 
/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)

==24882==by 0x115DFF7: xmalloc (xmalloc.c:147)

==24882==by 0x6FADCC: get_loop_body(loop const*) (cfgloop.c:834)

==24882==by 0xB77520: hoist_guard (tree-ssa-loop-unswitch.c:881)

==24882==by 0xB77520: tree_unswitch_outer_loop 
(tree-ssa-loop-unswitch.c:536)

==24882==by 0xB77520: tree_ssa_unswitch_loops() 
(tree-ssa-loop-unswitch.c:104)

==24882==by 0x99C65E: execute_one_pass(opt_pass*) (passes.c:2465)

==24882==by 0x99CE17: execute_pass_list_1(opt_pass*) (passes.c:2554)

==24882==by 0x99CE29: execute_pass_list_1(opt_pass*) (passes.c:2555)

==24882==by 0x99CE29: execute_pass_list_1(opt_pass*) (passes.c:2555)

==24882==by 0x99CE74: execute_pass_list(function*, opt_pass*) 
(passes.c:2565)

==24882==by 0x71E745: cgraph_node::expand() (cgraphunit.c:2038)

==24882==by 0x71FCC3: expand_all_functions (cgraphunit.c:2174)

==24882==by 0x71FCC3: symbol_table::compile() (cgraphunit.c:2531)

==24882==by 0x7214DB: symbol_table::finalize_compilation_unit() 
(cgraphunit.c:2621)

==24882==by 0xA5B3AB: compile_file() (toplev.c:492)

==24882==by 0x5F3A78: do_compile (toplev.c:1984)

==24882==by 0x5F3A78: toplev::main(int, char**) (toplev.c:2118)

==24882==by 0x5F5826: main (main.c:39)


Fixed in attached patch that can bootstrap on ppc64le-redhat-linux and survives 
regression tests.

Ready to be installed?
Martin

>From 290c88481c42ca4c09cf4c4202903fc84d0c9dc4 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Tue, 14 Feb 2017 09:45:41 +0100
Subject: [PATCH 1/2] Fix memory leak in tree-ssa-loop-unswitch.c

gcc/ChangeLog:

2017-02-14  Martin Liska  

	* tree-ssa-loop-unswitch.c (hoist_guard): Release get_loop_body
	vector.  Fix trailing white spaces.
---
 gcc/tree-ssa-loop-unswitch.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c
index 143caf73e86..afa04e9d110 100644
--- a/gcc/tree-ssa-loop-unswitch.c
+++ b/gcc/tree-ssa-loop-unswitch.c
@@ -820,7 +820,7 @@ hoist_guard (struct loop *loop, edge guard)
   /* Create new loop pre-header.  */
   e = split_block (pre_header, last_stmt (pre_header));
   if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "  Moving guard %i->%i (prob %i) to bb %i, "	
+fprintf (dump_file, "  Moving guard %i->%i (prob %i) to bb %i, "
 	 "new preheader is %i\n",
 	 guard->src->index, guard->dest->index, guard->probability,
 	 e->src->index, e->dest->index);
@@ -879,7 +879,7 @@ hoist_guard (struct loop *loop, edge guard)
   /* ... finally scale everything in the loop except for guarded basic blocks
  where profile does not change.  */
   basic_block *body = get_loop_body (loop);
-  
+
   if (dump_file && (dump_flags & TDF_DETAILS))
 fprintf (dump_file, "  Scaling nonguarded BBs in loop:");
   for (unsigned int i = 0; i < loop->num_nodes; i++)
@@ -920,6 +920,8 @@ hoist_guard (struct loop *loop, edge guard)
 
   if (dump_file && (dump_flags & TDF_DETAILS))
 fprintf (dump_file, "\n  guard hoisted.\n");
+
+  free (body);
 }
 
 /* Return true if phi argument for exit edge can be used
-- 
2.11.0



Fix profile updating after outer loop unswitching

2017-02-05 Thread Jan Hubicka
Hi,
this patches updates profile after hoist_guard transformation that was added in
2015.  I wonder why this transofrm is bundled in tree-ssa-loop-unswitch and not
enabled at -O2/-Os.  It converts

 while (1)  
   {
 [header]]  
 loop_phi_nodes;
 something1;
 if (cond1) 
   body;
 nvar = phi(orig, bvar) ... for all variables changed in body;  
 [guard_end]
 something2;
 if (cond2) 
   break;   
 something3;
   }

to

   if (cond1)
 while (1)  
   {
 [header]]  
 loop_phi_nodes;
 something1;
 body;
 [guard_end]
 something2;
 if (cond2) 
   break;   
 something3;
   }

Which, unlike normal if conversion seems almost always win becuase it does not
duplicate any code. While path where loop executes 0 times has one extra
if (cond1) on it, this seems to be quite reasonable tradeoff.

Bootstrapped/regtested x86_64-linux, will commit it tomorrow unless there
are complains.

* gcc.dg/loop-unswitch-2.c: New testcase.
* gcc.dg/loop-unswitch-1.c: New testcase.

* tree-ssa-loop-unswitch.c (hoist_guard): Update profile.
Index: testsuite/gcc.dg/loop-unswitch-2.c
===
--- testsuite/gcc.dg/loop-unswitch-2.c  (revision 245196)
+++ testsuite/gcc.dg/loop-unswitch-2.c  (working copy)
@@ -12,4 +12,5 @@ void foo (float **a, float **b, float *c
 }
 
 /* { dg-final { scan-tree-dump-times "guard hoisted" 3 "unswitch" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch" } } */
 
Index: testsuite/gcc.dg/loop-unswitch-3.c
===
--- testsuite/gcc.dg/loop-unswitch-3.c  (revision 245196)
+++ testsuite/gcc.dg/loop-unswitch-3.c  (working copy)
@@ -22,5 +22,6 @@ float *foo(int ustride, int size, float
 }
 
 /* { dg-final { scan-tree-dump-times "guard hoisted" 1 "unswitch" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch" } } */
 
 
Index: tree-ssa-loop-unswitch.c
===
--- tree-ssa-loop-unswitch.c(revision 245196)
+++ tree-ssa-loop-unswitch.c(working copy)
@@ -787,6 +787,7 @@ hoist_guard (struct loop *loop, edge gua
   edge te, fe, e, new_edge;
   gimple *stmt;
   basic_block guard_bb = guard->src;
+  edge not_guard;
   gimple_stmt_iterator gsi;
   int flags = 0;
   bool fix_dom_of_exit;
@@ -818,18 +819,80 @@ hoist_guard (struct loop *loop, edge gua
   update_stmt (cond_stmt);
   /* Create new loop pre-header.  */
   e = split_block (pre_header, last_stmt (pre_header));
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fprintf (dump_file, "  Moving guard %i->%i (prob %i) to bb %i, "   
+"new preheader is %i\n",
+guard->src->index, guard->dest->index, guard->probability,
+e->src->index, e->dest->index);
+
   gcc_assert (loop_preheader_edge (loop)->src == e->dest);
+
   if (guard == fe)
 {
   e->flags = EDGE_TRUE_VALUE;
   flags |= EDGE_FALSE_VALUE;
+  not_guard = te;
 }
   else
 {
   e->flags = EDGE_FALSE_VALUE;
   flags |= EDGE_TRUE_VALUE;
+  not_guard = fe;
 }
   new_edge = make_edge (pre_header, exit->dest, flags);
+
+  /* Determine the probability that we skip the loop.  Assume that loop has
+