[PATCH, 15/16] Add libgomp.oacc-c-c++-common/kernels-*.c

2015-11-09 Thread Tom de Vries

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


This patch adds C/C++ oacc kernels execution tests.

Thanks,
- Tom

Add libgomp.oacc-c-c++-common/kernels-*.c

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
	Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
	Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
	Same.
	* testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c: Same.
---
 .../libgomp.oacc-c-c++-common/kernels-loop-2.c | 47 ++
 .../libgomp.oacc-c-c++-common/kernels-loop-3.c | 34 +
 .../kernels-loop-and-seq-2.c   | 36 ++
 .../kernels-loop-and-seq-3.c   | 37 ++
 .../kernels-loop-and-seq-4.c   | 36 ++
 .../kernels-loop-and-seq-5.c   | 37 ++
 .../kernels-loop-and-seq-6.c   | 36 ++
 .../kernels-loop-and-seq.c | 37 ++
 .../kernels-loop-collapse.c| 40 
 .../kernels-loop-data-2.c  | 56 ++
 .../kernels-loop-data-enter-exit-2.c   | 54 +
 .../kernels-loop-data-enter-exit.c | 51 
 .../kernels-loop-data-update.c | 53 
 .../libgomp.oacc-c-c++-common/kernels-loop-data.c  | 50 +++
 .../libgomp.oacc-c-c++-common/kernels-loop-g.c |  5 ++
 .../kernels-loop-mod-not-zero.c| 41 
 .../libgomp.oacc-c-c++-common/kernels-loop-n.c | 47 ++
 .../libgomp.oacc-c-c++-common/kernels-loop-nest.c  | 26 ++
 .../libgomp.oacc-c-c++-common/kernels-loop.c   | 41 
 .../kernels-parallel-loop-data-enter-exit.c| 52 
 .../libgomp.oacc-c-c++-common/kernels-reduction.c  | 37 ++
 21 files changed, 853 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/k

[PATCH, 12/16] Handle acc loop directive

2015-11-09 Thread Tom de Vries

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


this patch deals with loops in an oacc kernels region which are 
annotated using "#pragma acc loop". It expands such a loop as a normal 
loop, which has the effect of ignoring the "#pragma acc loop".


Thanks,
- Tom

Handle acc loop directive

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (struct omp_region): Add inside_kernels_p field.
	(expand_omp_for_generic): Only set address taken for istart0
	and end0 unless necessary.  Adjust to generate a 'sequential' loop
	when GOMP builtin arguments are BUILT_IN_NONE.
	(expand_omp_for): Use expand_omp_for_generic() to generate a
	non-parallelized loop for OMP_FORs inside OpenACC kernels regions.
	(expand_omp): Mark inside_kernels_p field true for regions
	nested inside OpenACC kernels constructs.
---
 gcc/omp-low.c | 127 --
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 1283cc7..859a2eb 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -136,6 +136,9 @@ struct omp_region
   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
  a depend clause.  */
   gomp_ordered *ord_stmt;
+
+  /* True if this is nested inside an OpenACC kernels construct.  */
+  bool inside_kernels_p;
 };
 
 /* Context structure.  Used to store information about each parallel
@@ -8238,6 +8241,7 @@ expand_omp_for_generic (struct omp_region *region,
   gassign *assign_stmt;
   bool in_combined_parallel = is_combined_parallel (region);
   bool broken_loop = region->cont == NULL;
+  bool seq_loop = (start_fn == BUILT_IN_NONE || next_fn == BUILT_IN_NONE);
   edge e, ne;
   tree *counts = NULL;
   int i;
@@ -8335,8 +8339,12 @@ expand_omp_for_generic (struct omp_region *region,
   type = TREE_TYPE (fd->loop.v);
   istart0 = create_tmp_var (fd->iter_type, ".istart0");
   iend0 = create_tmp_var (fd->iter_type, ".iend0");
-  TREE_ADDRESSABLE (istart0) = 1;
-  TREE_ADDRESSABLE (iend0) = 1;
+
+if (!seq_loop)
+{
+  TREE_ADDRESSABLE (istart0) = 1;
+  TREE_ADDRESSABLE (iend0) = 1;
+}
 
   /* See if we need to bias by LLONG_MIN.  */
   if (fd->iter_type == long_long_unsigned_type_node
@@ -8366,7 +8374,20 @@ expand_omp_for_generic (struct omp_region *region,
   gsi_prev ();
 
   tree arr = NULL_TREE;
-  if (in_combined_parallel)
+  if (seq_loop)
+{
+  tree n1 = fold_convert (fd->iter_type, fd->loop.n1);
+  tree n2 = fold_convert (fd->iter_type, fd->loop.n2);
+
+  assign_stmt = gimple_build_assign (istart0, n1);
+  gsi_insert_before (, assign_stmt, GSI_SAME_STMT);
+
+  assign_stmt = gimple_build_assign (iend0, n2);
+  gsi_insert_before (, assign_stmt, GSI_SAME_STMT);
+
+  t = fold_build2 (NE_EXPR, boolean_type_node, istart0, iend0);
+}
+  else if (in_combined_parallel)
 {
   gcc_assert (fd->ordered == 0);
   /* In a combined parallel loop, emit a call to
@@ -8788,39 +8809,45 @@ expand_omp_for_generic (struct omp_region *region,
 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
 
   /* Emit code to get the next parallel iteration in L2_BB.  */
-  gsi = gsi_start_bb (l2_bb);
+  if (!seq_loop)
+	{
+	  gsi = gsi_start_bb (l2_bb);
 
-  t = build_call_expr (builtin_decl_explicit (next_fn), 2,
-			   build_fold_addr_expr (istart0),
-			   build_fold_addr_expr (iend0));
-  t = force_gimple_operand_gsi (, t, true, NULL_TREE,
-false, GSI_CONTINUE_LINKING);
-  if (TREE_

[PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-09 Thread Tom de Vries

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.



This patch adds the pass_oacc_kernels pass group to the pass list in 
passes.def.


Note the repetition of pass_lim/pass_copy_prop. The first pair is for an 
inner loop in a loop nest, the second for an outer loop in a loop nest.


Thanks,
- Tom

Add pass_oacc_kernels pass group in passes.def

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (pass_expand_omp_ssa::clone): New function.
	* tree-ssa-loop.c (pass_scev_cprop::clone, pass_tree_loop_init::clone)
	(pass_tree_loop_done::clone): New function.
	* passes.def: Add pass_oacc_kernels pass group.
---
 gcc/omp-low.c   |  1 +
 gcc/passes.def  | 21 +
 gcc/tree-ssa-loop.c |  3 +++
 3 files changed, 25 insertions(+)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 13fa456..1283cc7 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13360,6 +13360,7 @@ public:
   return !(fun->curr_properties & PROP_gimple_eomp);
 }
   virtual unsigned int execute (function *) { return execute_expand_omp (); }
+  opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
 
 }; // class pass_expand_omp_ssa
 
diff --git a/gcc/passes.def b/gcc/passes.def
index c0ab6b9..b7a5424 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -86,6 +86,27 @@ along with GCC; see the file COPYING3.  If not see
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	 execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
+	  /* Pass group that runs when there are oacc kernels in the
+	 function.  */
+	  NEXT_PASS (pass_oacc_kernels);
+	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+	  NEXT_PASS (pass_dominator_oacc_kernels);
+	  NEXT_PASS (pass_ch_oacc_kernels);
+	  NEXT_PASS (pass_dominator_oacc_kernels);
+	  NEXT_PASS (pass_tree_loop_init);
+	  NEXT_PASS (pass_lim);
+	  NEXT_PASS (pass_copy_prop);
+	  NEXT_PASS (pass_lim);
+	  NEXT_PASS (pass_copy_prop);
+	  NEXT_PASS (pass_scev_cprop);
+	  NEXT_PASS (pass_tree_loop_done);
+	  NEXT_PASS (pass_dominator_oacc_kernels);
+	  NEXT_PASS (pass_dce);
+	  NEXT_PASS (pass_tree_loop_init);
+	  NEXT_PASS (pass_parallelize_loops_oacc_kernels);
+	  NEXT_PASS (pass_expand_omp_ssa);
+	  NEXT_PASS (pass_tree_loop_done);
+	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_merge_phi);
   NEXT_PASS (pass_dse);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index b51cac2..0557f99 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -270,6 +270,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *);
+  opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }
 
 }; // class pass_tree_loop_init
 
@@ -374,6 +375,7 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *) { return flag_tree_scev_cprop; }
   virtual unsigned int execute (function *) { return scev_const_prop (); }
+  opt_pass * clone () { return new pass_scev_cprop (m_ctxt); }
 
 }; // class pass_scev_cprop
 
@@ -516,6 +518,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+  opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }
 
 }; // class pass_tree_loop_done
 
-- 
1.9.1



[PATCH, 13/16] Add c-c++-common/goacc/kernels-*.c

2015-11-09 Thread Tom de Vries

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


This patch adds C/C++ oacc kernels compilation tests.

Thanks,
- Tom

Add c-c++-common/goacc/kernels-*.c

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* c-c++-common/goacc/kernels-acc-loop-reduction.c: New test.
	* c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: New test.
	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: New test.
	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test.
	* c-c++-common/goacc/kernels-double-reduction.c: New test.
	* c-c++-common/goacc/kernels-empty.c: New test.
	* c-c++-common/goacc/kernels-eternal.c: New test.
	* c-c++-common/goacc/kernels-loop-2-acc-loop.c: New test.
	* c-c++-common/goacc/kernels-loop-2.c: New test.
	* c-c++-common/goacc/kernels-loop-3-acc-loop.c: New test.
	* c-c++-common/goacc/kernels-loop-3.c: New test.
	* c-c++-common/goacc/kernels-loop-acc-loop.c: New test.
	* c-c++-common/goacc/kernels-loop-data-2.c: New test.
	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test.
	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test.
	* c-c++-common/goacc/kernels-loop-data-update.c: New test.
	* c-c++-common/goacc/kernels-loop-data.c: New test.
	* c-c++-common/goacc/kernels-loop-g.c: New test.
	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: New test.
	* c-c++-common/goacc/kernels-loop-n-acc-loop.c: New test.
	* c-c++-common/goacc/kernels-loop-n.c: New test.
	* c-c++-common/goacc/kernels-loop-nest.c: New test.
	* c-c++-common/goacc/kernels-loop.c: New test.
	* c-c++-common/goacc/kernels-noreturn.c: New test.
	* c-c++-common/goacc/kernels-one-counter-var.c: New test.
	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New test.
	* c-c++-common/goacc/kernels-reduction.c: New test.
---
 .../goacc/kernels-acc-loop-reduction.c | 25 
 .../goacc/kernels-acc-loop-smaller-equal.c | 25 
 .../goacc/kernels-counter-var-redundant-load.c | 36 +++
 .../goacc/kernels-counter-vars-function-scope.c| 54 +
 .../c-c++-common/goacc/kernels-double-reduction.c  | 37 
 gcc/testsuite/c-c++-common/goacc/kernels-empty.c   |  6 ++
 gcc/testsuite/c-c++-common/goacc/kernels-eternal.c | 11 
 .../c-c++-common/goacc/kernels-loop-2-acc-loop.c   | 21 +++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  | 70 ++
 .../c-c++-common/goacc/kernels-loop-3-acc-loop.c   | 17 ++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  | 49 +++
 .../c-c++-common/goacc/kernels-loop-acc-loop.c | 17 ++
 .../c-c++-common/goacc/kernels-loop-data-2.c   | 70 ++
 .../goacc/kernels-loop-data-enter-exit-2.c | 68 +
 .../goacc/kernels-loop-data-enter-exit.c   | 65 
 .../c-c++-common/goacc/kernels-loop-data-update.c  | 65 
 .../c-c++-common/goacc/kernels-loop-data.c | 64 
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  | 17 ++
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c | 52 
 .../c-c++-common/goacc/kernels-loop-n-acc-loop.c   | 17 ++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  | 56 +
 .../c-c++-common/goacc/kernels-loop-nest.c | 39 
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c| 56 +
 .../c-c++-common/goacc/kernels-noreturn.c  | 12 
 .../c-c++-common/goacc/kernels-one-counter-var.c   | 54 +
 .../goacc/kernels-parallel-loop-data-enter-exit.c

[committed] gen-pass-instances.awk: Add emacs indent setting

2015-11-12 Thread Tom de Vries

Hi,

this patch adds emacs indentation settings to gen-pass-instances.awk. 
The default indentation width in emacs awk mode seems to be 4, and this 
setting overrides it to 8, which is the style used in this file.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Add emacs indent setting

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk: Add emacs indent setting.

---
 gcc/gen-pass-instances.awk | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index f36f510..a0be6a1 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -64,3 +64,8 @@ function handle_line()
 }
 
 { handle_line() }
+
+# Local Variables:
+# mode:awk
+# c-basic-offset:8
+# End:


[committed] gen-pass-instances.awk: Use early-out in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch restructures handle_line in gen-pass-instances.awk to use an 
early-out.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Use early-out in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Restructure using early-out.

---
 gcc/gen-pass-instances.awk | 32 +---
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 9eaac65..27e7a98 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -41,25 +41,27 @@ BEGIN {
 function handle_line()
 {
 	line = $0;
+
 	where = match(line, /NEXT_PASS \((.+)\)/);
-	if (where != 0)
+	if (where == 0)
 	{
-		len_of_start = length("NEXT_PASS (");
-		len_of_end = length(")");
-		len_of_pass_name = RLENGTH - (len_of_start + len_of_end);
-		pass_starts_at = where + len_of_start;
-		pass_name = substr(line, pass_starts_at, len_of_pass_name);
-		if (pass_name in pass_counts)
-			pass_counts[pass_name]++;
-		else
-			pass_counts[pass_name] = 1;
-		printf "%s, %s%s\n",
-			substr(line, 1, pass_starts_at + len_of_pass_name - 1),
-			pass_counts[pass_name],
-			substr(line, pass_starts_at + len_of_pass_name);
-	} else {
 		print line;
+		return;
 	}
+
+	len_of_start = length("NEXT_PASS (");
+	len_of_end = length(")");
+	len_of_pass_name = RLENGTH - (len_of_start + len_of_end);
+	pass_starts_at = where + len_of_start;
+	pass_name = substr(line, pass_starts_at, len_of_pass_name);
+	if (pass_name in pass_counts)
+		pass_counts[pass_name]++;
+	else
+		pass_counts[pass_name] = 1;
+	printf "%s, %s%s\n",
+		substr(line, 1, pass_starts_at + len_of_pass_name - 1),
+		pass_counts[pass_name],
+		substr(line, pass_starts_at + len_of_pass_name);
 }
 
 { handle_line() }


[committed] gen-pass-instances.awk: Add len_of_call var in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch adds a variable len_of_call in handle_line in 
gen-pass-instances.awk.  It moves the use of the RLENGTH variable just 
after the related match call.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Add len_of_call var in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Add len_of_call variable.

---
 gcc/gen-pass-instances.awk | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 27e7a98..70b00b7 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -42,6 +42,7 @@ function handle_line()
 {
 	line = $0;
 
+	# Find call expression.
 	where = match(line, /NEXT_PASS \((.+)\)/);
 	if (where == 0)
 	{
@@ -49,9 +50,12 @@ function handle_line()
 		return;
 	}
 
+	# Length of the call expression.
+	len_of_call = RLENGTH;
+
 	len_of_start = length("NEXT_PASS (");
 	len_of_end = length(")");
-	len_of_pass_name = RLENGTH - (len_of_start + len_of_end);
+	len_of_pass_name = len_of_call - (len_of_start + len_of_end);
 	pass_starts_at = where + len_of_start;
 	pass_name = substr(line, pass_starts_at, len_of_pass_name);
 	if (pass_name in pass_counts)


[committed] gen-pass-instances.awk: Add pass_num, prefix and postfix vars in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch adds new variables pass_num, prefix and postfix in 
handle_line in gen-pass-instances.awk.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Add pass_num, prefix and postfix vars in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Add pass_num, prefix and postfix
	vars.

---
 gcc/gen-pass-instances.awk | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 3d5e8b6..1aced74 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -61,17 +61,22 @@ function handle_line()
 	pass_starts_at = where + len_of_start;
 	pass_name = substr(line, pass_starts_at, len_of_pass_name);
 
+	# Find prefix (until and including pass_name)
+	prefix = substr(line, 1, pass_starts_at + len_of_pass_name - 1)
+
+	# Find postfix (after pass_name)
+	postfix = substr(line, pass_starts_at + len_of_pass_name)
+
 	# Set pass_counts
 	if (pass_name in pass_counts)
 		pass_counts[pass_name]++;
 	else
 		pass_counts[pass_name] = 1;
 
+	pass_num = pass_counts[pass_name];
+
 	# Print call expression with extra pass_num argument
-	printf "%s, %s%s\n",
-		substr(line, 1, pass_starts_at + len_of_pass_name - 1),
-		pass_counts[pass_name],
-		substr(line, pass_starts_at + len_of_pass_name);
+	printf "%s, %s%s\n", prefix, pass_num, postfix;
 }
 
 { handle_line() }


[committed] gen-pass-instances.awk: Rename var where to call_starts_at in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch renames the rather generic variable 'where' to the more 
specific 'call_starts_at' in handle_line in gen-pass-instances.awk.


Committed as to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Rename var where to call_starts_at in handle_line

2015-11-12  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Rename var where to
	call_starts_at.

---
 gcc/gen-pass-instances.awk | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index b10c26a..311273e 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -43,8 +43,8 @@ function handle_line()
 	line = $0;
 
 	# Find call expression.
-	where = match(line, /NEXT_PASS \((.+)\)/);
-	if (where == 0)
+	call_starts_at = match(line, /NEXT_PASS \((.+)\)/);
+	if (call_starts_at == 0)
 	{
 		print line;
 		return;
@@ -59,7 +59,7 @@ function handle_line()
 
 	# Find pass_name argument
 	len_of_pass_name = len_of_call - (len_of_start + len_of_close);
-	pass_starts_at = where + len_of_start;
+	pass_starts_at = call_starts_at + len_of_start;
 	pass_name = substr(line, pass_starts_at, len_of_pass_name);
 
 	# Find call expression prefix (until and including called function)


[committed] gen-pass-instances.awk: Rename len_of_end to len_of_close in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch renames variable len_of_end to len_of_close in handle_line in 
gen-pass-instances.awk.


Committed to trunk as obvious.

Thanks,
- Tom
gen-pass-instances.awk: Rename len_of_end to len_of_close in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Rename len_of_end to
	len_of_close.

---
 gcc/gen-pass-instances.awk | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 70b00b7..7624959 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -54,8 +54,9 @@ function handle_line()
 	len_of_call = RLENGTH;
 
 	len_of_start = length("NEXT_PASS (");
-	len_of_end = length(")");
-	len_of_pass_name = len_of_call - (len_of_start + len_of_end);
+	len_of_close = length(")");
+
+	len_of_pass_name = len_of_call - (len_of_start + len_of_close);
 	pass_starts_at = where + len_of_start;
 	pass_name = substr(line, pass_starts_at, len_of_pass_name);
 	if (pass_name in pass_counts)


[committed] gen-pass-instances.awk: Simplify init of postfix_starts_at in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch simplifies the initialization of postfix_starts_at in 
handle_line in gen-pass-instances.awk.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Simplify init of postfix_starts_at in handle_line

2015-11-12  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Simplify init of
	postfix_starts_at.

---
 gcc/gen-pass-instances.awk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 311273e..08d4a37 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -67,7 +67,7 @@ function handle_line()
 	prefix = substr(line, 1, prefix_len);
 
 	# Find call expression postfix
-	postfix_starts_at = pass_starts_at + len_of_pass_name + len_of_close;
+	postfix_starts_at = call_starts_at + len_of_call;
 	postfix = substr(line, postfix_starts_at);
 
 	# Set pass_counts


[committed] gen-pass-instances.awk: Unify semicolon use in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch unifies semicolon use in handle_line in gen-pass-instances.awk.

Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Unify semicolon use in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Unify semicolon use.

---
 gcc/gen-pass-instances.awk | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 7f33e8c..9eaac65 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -41,14 +41,14 @@ BEGIN {
 function handle_line()
 {
 	line = $0;
-	where = match(line, /NEXT_PASS \((.+)\)/)
+	where = match(line, /NEXT_PASS \((.+)\)/);
 	if (where != 0)
 	{
-		len_of_start = length("NEXT_PASS (")
-		len_of_end = length(")")
-		len_of_pass_name = RLENGTH - (len_of_start + len_of_end)
-		pass_starts_at = where + len_of_start
-		pass_name = substr(line, pass_starts_at, len_of_pass_name)
+		len_of_start = length("NEXT_PASS (");
+		len_of_end = length(")");
+		len_of_pass_name = RLENGTH - (len_of_start + len_of_end);
+		pass_starts_at = where + len_of_start;
+		pass_name = substr(line, pass_starts_at, len_of_pass_name);
 		if (pass_name in pass_counts)
 			pass_counts[pass_name]++;
 		else


gen-pass-instances.awk: Remove unused var in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch removes an unused variable from handle_line in 
gen-pass-instances.awk.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Remove unused var in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Remove unused var line_length.

---
 gcc/gen-pass-instances.awk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index a0be6a1..7f33e8c 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -47,7 +47,6 @@ function handle_line()
 		len_of_start = length("NEXT_PASS (")
 		len_of_end = length(")")
 		len_of_pass_name = RLENGTH - (len_of_start + len_of_end)
-		line_length = length(line)
 		pass_starts_at = where + len_of_start
 		pass_name = substr(line, pass_starts_at, len_of_pass_name)
 		if (pass_name in pass_counts)


[committed] gen-pass-instances.awk: Simplify match regexp in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch simplifies the match regexp in handle_line in 
gen-pass-instances.awk.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Simplify match regexp in handle_line

2015-11-12  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Simplify match regexp.

---
 gcc/gen-pass-instances.awk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 08d4a37..cbfaa86 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -43,7 +43,7 @@ function handle_line()
 	line = $0;
 
 	# Find call expression.
-	call_starts_at = match(line, /NEXT_PASS \((.+)\)/);
+	call_starts_at = match(line, /NEXT_PASS \(.+\)/);
 	if (call_starts_at == 0)
 	{
 		print line;


[committed] gen-pass-instances.awk: Add comments in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch adds some comments in handle_line in gen-pass-instances.awk.

Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Add comments in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Add comments.

---
 gcc/gen-pass-instances.awk | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 7624959..3d5e8b6 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -56,13 +56,18 @@ function handle_line()
 	len_of_start = length("NEXT_PASS (");
 	len_of_close = length(")");
 
+	# Find pass_name argument
 	len_of_pass_name = len_of_call - (len_of_start + len_of_close);
 	pass_starts_at = where + len_of_start;
 	pass_name = substr(line, pass_starts_at, len_of_pass_name);
+
+	# Set pass_counts
 	if (pass_name in pass_counts)
 		pass_counts[pass_name]++;
 	else
 		pass_counts[pass_name] = 1;
+
+	# Print call expression with extra pass_num argument
 	printf "%s, %s%s\n",
 		substr(line, 1, pass_starts_at + len_of_pass_name - 1),
 		pass_counts[pass_name],


[committed] gen-pass-instances.awk: Make print command clearer in handle_line

2015-11-12 Thread Tom de Vries

Hi,

this patch modifies the prefix and postfix expressions in handle_line 
gen-pass-instances.awk, such that the printf command now lists all the 
NEXT_PASS call arguments, and surrounds them with parentheses.


Committed to trunk as trivial.

Thanks,
- Tom
gen-pass-instances.awk: Make print command clearer in handle_line

2015-11-11  Tom de Vries  <t...@codesourcery.com>

	* gen-pass-instances.awk (handle_line): Print parentheses and pass_name
	explicitly.

---
 gcc/gen-pass-instances.awk | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 1aced74..b10c26a 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -54,6 +54,7 @@ function handle_line()
 	len_of_call = RLENGTH;
 
 	len_of_start = length("NEXT_PASS (");
+	len_of_open = length("(");
 	len_of_close = length(")");
 
 	# Find pass_name argument
@@ -61,11 +62,13 @@ function handle_line()
 	pass_starts_at = where + len_of_start;
 	pass_name = substr(line, pass_starts_at, len_of_pass_name);
 
-	# Find prefix (until and including pass_name)
-	prefix = substr(line, 1, pass_starts_at + len_of_pass_name - 1)
+	# Find call expression prefix (until and including called function)
+	prefix_len = pass_starts_at - 1 - len_of_open;
+	prefix = substr(line, 1, prefix_len);
 
-	# Find postfix (after pass_name)
-	postfix = substr(line, pass_starts_at + len_of_pass_name)
+	# Find call expression postfix
+	postfix_starts_at = pass_starts_at + len_of_pass_name + len_of_close;
+	postfix = substr(line, postfix_starts_at);
 
 	# Set pass_counts
 	if (pass_name in pass_counts)
@@ -76,7 +79,7 @@ function handle_line()
 	pass_num = pass_counts[pass_name];
 
 	# Print call expression with extra pass_num argument
-	printf "%s, %s%s\n", prefix, pass_num, postfix;
+	printf "%s(%s, %s)%s\n", prefix, pass_name, pass_num, postfix;
 }
 
 { handle_line() }


[RFC] Remove first_pass_instance from pass_vrp

2015-11-12 Thread Tom de Vries

Hi,

[ See also related discussion at 
https://gcc.gnu.org/ml/gcc-patches/2012-07/msg00452.html ]


this patch removes the usage of first_pass_instance from pass_vrp.

the patch:
- limits itself to pass_vrp, but my intention is to remove all
  usage of first_pass_instance
- lacks an update to gdbhooks.py

Modifying the pass behaviour depending on the instance number, as 
first_pass_instance does, break compositionality of the pass list. In 
other words, adding a pass instance in a pass list may change the 
behaviour of another instance of that pass in the pass list. Which 
obviously makes it harder to understand and change the pass list. [ I've 
filed this issue as PR68247 - Remove pass_first_instance ]


The solution is to make the difference in behaviour explicit in the pass 
list, and no longer change behaviour depending on instance number.


One obvious possible fix is to create a duplicate pass with a different 
name, say 'pass_vrp_warn_array_bounds':

...
  NEXT_PASS (pass_vrp_warn_array_bounds);
  ...
  NEXT_PASS (pass_vrp);
...

But, AFAIU that requires us to choose a different dump-file name for 
each pass. And choosing vrp1 and vrp2 as new dump-file names still means 
that -fdump-tree-vrp no longer works (which was mentioned as drawback 
here: https://gcc.gnu.org/ml/gcc-patches/2012-07/msg00453.html ).


This patch instead makes pass creation parameterizable. So in the pass 
list, we use:

...
  NEXT_PASS_WITH_ARG (pass_vrp, true /* warn_array_bounds_p */);
  ...
  NEXT_PASS_WITH_ARG (pass_vrp, false /* warn_array_bounds_p */);
...

This approach gives us clarity in the pass list, similar to using a 
duplicate pass 'pass_vrp_warn_array_bounds'.


But it also means -fdump-tree-vrp still works as before.

Good idea? Other comments?

Thanks,
- Tom
Remove first_pass_instance from pass_vrp

---
 gcc/gen-pass-instances.awk | 32 ++--
 gcc/pass_manager.h |  2 ++
 gcc/passes.c   | 20 
 gcc/passes.def |  4 ++--
 gcc/tree-pass.h|  3 ++-
 gcc/tree-vrp.c | 22 --
 6 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index cbfaa86..c77bd64 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -43,7 +43,7 @@ function handle_line()
 	line = $0;
 
 	# Find call expression.
-	call_starts_at = match(line, /NEXT_PASS \(.+\)/);
+	call_starts_at = match(line, /NEXT_PASS(_WITH_ARG)? \(.+\)/);
 	if (call_starts_at == 0)
 	{
 		print line;
@@ -53,23 +53,28 @@ function handle_line()
 	# Length of the call expression.
 	len_of_call = RLENGTH;
 
-	len_of_start = length("NEXT_PASS (");
 	len_of_open = length("(");
 	len_of_close = length(")");
 
-	# Find pass_name argument
-	len_of_pass_name = len_of_call - (len_of_start + len_of_close);
-	pass_starts_at = call_starts_at + len_of_start;
-	pass_name = substr(line, pass_starts_at, len_of_pass_name);
-
 	# Find call expression prefix (until and including called function)
-	prefix_len = pass_starts_at - 1 - len_of_open;
-	prefix = substr(line, 1, prefix_len);
+	match(line, /NEXT_PASS(_WITH_ARG)? /)
+	len_of_call_name = RLENGTH
+	prefix_len = call_starts_at + len_of_call_name - 1
+	prefix = substr(line, 1, prefix_len)
 
 	# Find call expression postfix
 	postfix_starts_at = call_starts_at + len_of_call;
 	postfix = substr(line, postfix_starts_at);
 
+	args_starts_at = prefix_len + 1 + len_of_open;
+	len_of_args = postfix_starts_at - args_starts_at - len_of_close;
+	args_str = substr(line, args_starts_at, len_of_args);
+	split(args_str, args, ",");
+
+	# Find pass_name argument, an optional with_arg argument
+	pass_name = args[1];
+	with_arg = args[2];
+
 	# Set pass_counts
 	if (pass_name in pass_counts)
 		pass_counts[pass_name]++;
@@ -79,7 +84,14 @@ function handle_line()
 	pass_num = pass_counts[pass_name];
 
 	# Print call expression with extra pass_num argument
-	printf "%s(%s, %s)%s\n", prefix, pass_name, pass_num, postfix;
+	printf "%s(", prefix;
+	printf "%s", pass_name;
+	printf ", %s", pass_num;
+	if (with_arg)
+	{
+		printf ", %s", with_arg;
+	}
+	printf ")%s\n", postfix;
 }
 
 { handle_line() }
diff --git a/gcc/pass_manager.h b/gcc/pass_manager.h
index 7d539e4..a8199e2 100644
--- a/gcc/pass_manager.h
+++ b/gcc/pass_manager.h
@@ -120,6 +120,7 @@ private:
 #define PUSH_INSERT_PASSES_WITHIN(PASS)
 #define POP_INSERT_PASSES()
 #define NEXT_PASS(PASS, NUM) opt_pass *PASS ## _ ## NUM
+#define NEXT_PASS_WITH_ARG(PASS, NUM, ARG) NEXT_PASS (PASS, NUM)
 #define TERMINATE_PASS_LIST()
 
 #include "pass-instances.def"
@@ -128,6 +129,7 @@ private:
 #undef PUSH_INSERT_PASSES_WITHIN
 #undef POP_INSERT_PASSES
 #undef NEXT_PASS
+#undef NEXT_PASS_WITH_ARG
 #undef TERMINATE_PASS_LIST
 
 }; // class pass_manager
diff --git a/gcc/passes.c b/gcc/passes.c
index dd8d00a..0fd365e 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -81,6 +81,12 @@ opt_pass::clone ()
   internal_error 

Re: [RFC] Remove first_pass_instance from pass_vrp

2015-11-12 Thread Tom de Vries

On 12/11/15 13:26, Richard Biener wrote:

On Thu, Nov 12, 2015 at 12:37 PM, Tom de Vries <tom_devr...@mentor.com> wrote:

Hi,

[ See also related discussion at
https://gcc.gnu.org/ml/gcc-patches/2012-07/msg00452.html ]

this patch removes the usage of first_pass_instance from pass_vrp.

the patch:
- limits itself to pass_vrp, but my intention is to remove all
   usage of first_pass_instance
- lacks an update to gdbhooks.py

Modifying the pass behaviour depending on the instance number, as
first_pass_instance does, break compositionality of the pass list. In other
words, adding a pass instance in a pass list may change the behaviour of
another instance of that pass in the pass list. Which obviously makes it
harder to understand and change the pass list. [ I've filed this issue as
PR68247 - Remove pass_first_instance ]

The solution is to make the difference in behaviour explicit in the pass
list, and no longer change behaviour depending on instance number.

One obvious possible fix is to create a duplicate pass with a different
name, say 'pass_vrp_warn_array_bounds':
...
   NEXT_PASS (pass_vrp_warn_array_bounds);
   ...
   NEXT_PASS (pass_vrp);
...

But, AFAIU that requires us to choose a different dump-file name for each
pass. And choosing vrp1 and vrp2 as new dump-file names still means that
-fdump-tree-vrp no longer works (which was mentioned as drawback here:
https://gcc.gnu.org/ml/gcc-patches/2012-07/msg00453.html ).

This patch instead makes pass creation parameterizable. So in the pass list,
we use:
...
   NEXT_PASS_WITH_ARG (pass_vrp, true /* warn_array_bounds_p */);
   ...
   NEXT_PASS_WITH_ARG (pass_vrp, false /* warn_array_bounds_p */);
...

This approach gives us clarity in the pass list, similar to using a
duplicate pass 'pass_vrp_warn_array_bounds'.

But it also means -fdump-tree-vrp still works as before.

Good idea? Other comments?


It's good to get rid of the first_pass_instance hack.

I can't comment on the AWK, leaving that to others.  Syntax-wise I'd hoped
we can just use NEXT_PASS with the extra argument being optional...


I suppose I could use NEXT_PASS in the pass list, and expand into 
NEXT_PASS_WITH_ARG in pass-instances.def.


An alternative would be to change the NEXT_PASS macro definitions into 
vararg variants. But the last time I submitted something with a vararg 
macro ( https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00794.html ), I 
got a question about it ( 
https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00912.html ), so I tend to 
avoid using vararg macros.



I don't see the need for giving clone_with_args a new name, just use an overload
of clone ()?


That's what I tried initially, but I ran into:
...
src/gcc/tree-pass.h:85:21: warning: ‘virtual opt_pass* 
opt_pass::clone()’ was hidden [-Woverloaded-virtual]

   virtual opt_pass *clone ();
 ^
src/gcc/tree-vrp.c:10393:14: warning:   by ‘virtual opt_pass* 
{anonymous}::pass_vrp::clone(bool)’ [-Woverloaded-virtual]
   opt_pass * clone (bool warn_array_bounds_p) { return new pass_vrp 
(m_ctxt, warn_array_bounds_p); }

...

Googling the error message gives this discussion: ( 
http://stackoverflow.com/questions/16505092/confused-about-virtual-overloaded-functions 
), and indeed adding

  "using gimple_opt_pass::clone;"
in class pass_vrp makes the warning disappear.

I'll submit an updated version.

Thanks,
- Tom

> [ideally C++ would allow us to say that only one overload may be
> implemented]


Re: [PATCH, 11/16] Update testcases after adding kernels pass group

2015-11-12 Thread Tom de Vries

On 11/11/15 12:03, Richard Biener wrote:

On Mon, 9 Nov 2015, Tom de Vries wrote:


On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

   1Insert new exit block only when needed in
  transform_to_exit_first_loop_alt
   2Make create_parallel_loop return void
   3Ignore reduction clause on kernels directive
   4Implement -foffload-alias
   5Add in_oacc_kernels_region in struct loop
   6Add pass_oacc_kernels
   7Add pass_dominator_oacc_kernels
   8Add pass_ch_oacc_kernels
   9Add pass_parallelize_loops_oacc_kernels
  10Add pass_oacc_kernels pass group in passes.def
  11Update testcases after adding kernels pass group
  12Handle acc loop directive
  13Add c-c++-common/goacc/kernels-*.c
  14Add gfortran.dg/goacc/kernels-*.f95
  15Add libgomp.oacc-c-c++-common/kernels-*.c
  16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


This patch updates existing testcases with new pass numbers, given the passes
that were added in the pass list in patch 10.


I think it would be nice to be able to specify the number in the .def
file instead so we can avoid this kind of churn everytime we do this.


How about something along the lines of:
...
  /* pass_build_ealias is a dummy pass that ensures that we
 execute TODO_rebuild_alias at this point.  */
  NEXT_PASS (pass_build_ealias);
  /* Pass group that runs when there are oacc kernels in the
  function.  */
  NEXT_PASS (pass_oacc_kernels);
  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
  PUSH_ID ("oacc_kernels")
...
  POP_ID ()
  POP_INSERT_PASSES ()
  NEXT_PASS (pass_fre);
...

where the PUSH_ID/POP_ID pair has the functionality that all the 
contained passes:

- have the id prefixed to the dump file, so the dump file of pass_ch
  which normally is "ch" becomes "oacc_kernels_ch", and
- the pass name in pass_instances.def becomes pass_oacc_kernels_ch, such
  that it doesn't count as numbered instance of pass_ch
?

Thanks,
- Tom


Re: [PATCH] Preserve the original program while using graphite

2015-11-13 Thread Tom de Vries

On 11/11/15 23:54, Aditya Kumar wrote:

Earlier, graphite used to translate portions of the original program after
scop-detection in order to represent the SCoP into polyhedral model.  This was
required because each basic block was represented as independent basic block in
the polyhedral model. So all the cross-basic-block dependencies were translated
out-of-ssa.

With this patch those dependencies are also exposed to the ISL, so there is no
need to modify the original structure of the program.

After this patch we should be able to enable graphite at some default
optimization level.


Highlights:
Remove cross bb scalar to array translation
For reductions, add support for more than just INT_CST
Early bailout on codegen.
Verify loop-closed ssa structure during copy of renames
The uses of exprs should come from bb which dominates the bb
Collect the init value of close phi in loop-guard
Do not follow vuses for close-phi, postpone loop-close phi until the
 corresponding loop-phi is processed
Bail out if no bb found to place cond/loop -phis
Move insertion of liveouts at the end of codegen
Insert loop-phis in the loop-header.


This patch passes regtest and bootstrap with BOOT_CFLAGS='-O2 
-fgraphite-identity -floop-nest-optimize'




This patch has been committed, and caused PR68341 - 'FAIL: 
gcc.dg/graphite/interchange-{1,11,13}.c (internal compiler error)'


Thanks,
- Tom


Re: [PATCH] Remove first_pass_instance from pass_vrp

2015-11-14 Thread Tom de Vries

On 13/11/15 14:57, Tom de Vries wrote:

I've implemented the set_arg scenario, though I've renamed it to
set_pass_param. I've also added a parameter number argument to
set_pass_param.

Furthermore, I've included the gdbhooks.py update.

OK for trunk if bootstrap and reg-test passes?



Bootstrap and reg-test on x86_64 succeeded.

OK for trunk?

Thanks,
- Tom


Btw, I think
   NEXT_PASS (pass_vrp, false /* warn_array_bounds_p */);
is now equivalent to
   NEXT_PASS (pass_vrp);
I'm not sure which one I prefer in passes.def.

Thanks,
- Tom


0003-Remove-first_pass_instance-from-pass_vrp.patch


Remove first_pass_instance from pass_vrp

2015-11-13  Tom de Vries<t...@codesourcery.com>

* gdbhooks.py (class PassNames): Handle extra arg NEXT_PASS argument.
* gen-pass-instances.awk (handle_line): Same.
* pass_manager.h (class pass_manager): Define and undefine
NEXT_PASS_WITH_ARG.
* passes.c (opt_pass::set_pass_param): New function.
(pass_manager::pass_manager): Define and undefine NEXT_PASS_WITH_ARG.
* passes.def: Add extra arg to NEXT_PASS (pass_vrp).
* tree-pass.h (gimple_opt::set_pass_param): Declare.
* tree-vrp.c (vrp_finalize, execute_vrp): Add and handle
warn_array_bounds_p parameter.
(pass_vrp::pass_vrp): Initialize warn_array_bounds_p.
(pass_vrp::set_pass_param): New function.
(pass_vrp::execute): Add warn_array_bounds_p arg to execute_vrp call.
(pass_vrp::warn_array_bounds_p): New private member.

---
  gcc/gdbhooks.py|  2 +-
  gcc/gen-pass-instances.awk | 28 +++-
  gcc/pass_manager.h |  2 ++
  gcc/passes.c   | 14 ++
  gcc/passes.def |  4 ++--
  gcc/tree-pass.h|  1 +
  gcc/tree-vrp.c | 20 ++--
  7 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/gcc/gdbhooks.py b/gcc/gdbhooks.py
index 2b9a94c..f920392 100644
--- a/gcc/gdbhooks.py
+++ b/gcc/gdbhooks.py
@@ -537,7 +537,7 @@ class PassNames:
  self.names = []
  with open(os.path.join(srcdir, 'passes.def')) as f:
  for line in f:
-m = re.match('\s*NEXT_PASS \((.+)\);', line)
+m = re.match('\s*NEXT_PASS \(([^,]+).*\);', line)
  if m:
  self.names.append(m.group(1))

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 9cff429..106a2f6 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -61,12 +61,14 @@ function handle_line()
len_of_args = len_of_call - (len_of_start + len_of_close);
args_start_at = call_starts_at + len_of_start;
args_str = substr(line, args_start_at, len_of_args);
+   split(args_str, args, ",");

-   # Set pass_name argument
-   pass_name = args_str;
+   # Set pass_name argument, an optional with_arg argument
+   pass_name = args[1];
+   with_arg = args[2];

-   # Find call expression prefix (until and including called function)
-   len_of_prefix = args_start_at - 1 - len_of_open;
+   # Find call expression prefix
+   len_of_prefix = call_starts_at - 1;
prefix = substr(line, 1, len_of_prefix);

# Find call expression postfix
@@ -82,7 +84,23 @@ function handle_line()
pass_num = pass_counts[pass_name];

# Print call expression with extra pass_num argument
-   printf "%s(%s, %s)%s\n", prefix, pass_name, pass_num, postfix;
+   printf "%s", prefix;
+   if (with_arg)
+   {
+   printf "NEXT_PASS_WITH_ARG";
+   }
+   else
+   {
+   printf "NEXT_PASS";
+   }
+   printf " (";
+   printf "%s", pass_name;
+   printf ", %s", pass_num;
+   if (with_arg)
+   {
+   printf ", %s", with_arg;
+   }
+   printf ")%s\n", postfix;
  }

  { handle_line() }
diff --git a/gcc/pass_manager.h b/gcc/pass_manager.h
index 7d539e4..a8199e2 100644
--- a/gcc/pass_manager.h
+++ b/gcc/pass_manager.h
@@ -120,6 +120,7 @@ private:
  #define PUSH_INSERT_PASSES_WITHIN(PASS)
  #define POP_INSERT_PASSES()
  #define NEXT_PASS(PASS, NUM) opt_pass *PASS ## _ ## NUM
+#define NEXT_PASS_WITH_ARG(PASS, NUM, ARG) NEXT_PASS (PASS, NUM)
  #define TERMINATE_PASS_LIST()

  #include "pass-instances.def"
@@ -128,6 +129,7 @@ private:
  #undef PUSH_INSERT_PASSES_WITHIN
  #undef POP_INSERT_PASSES
  #undef NEXT_PASS
+#undef NEXT_PASS_WITH_ARG
  #undef TERMINATE_PASS_LIST

  }; // class pass_manager
diff --git a/gcc/passes.c b/gcc/passes.c
index dd8d00a..e634c5c 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -81,6 +81,13 @@ opt_pass::clone ()
internal_error ("pass %s does not support cloning", name);
  }

+void
+opt_pass::set_pass_param (unsigned int, bool)
+{
+  internal_error ("pass %s needs a

Re: [PATCH, 6/6] Remove first_pass_instance

2015-11-15 Thread Tom de Vries

On 15/11/15 16:24, David Malcolm wrote:

On Sun, 2015-11-15 at 12:08 +0100, Tom de Vries wrote:

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

   1Remove first_pass_instance from pass_vrp
   2Remove first_pass_instance from pass_reassoc
   3Remove first_pass_instance from pass_dominator
   4Remove first_pass_instance from pass_object_sizes
   5Remove first_pass_instance from pass_ccp
   6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patch removes the variable first_pass_instance.


Can we also get rid of TODO_mark_first_instance, or would that be a
followup?


TODO_mark_first_instance is used in position_pass, which AFAIU is used 
in the plugin infrastructure. I'm not familiar with the plugin 
infrastructure and concepts, so I can't say anything sensible about 
whether we can get rid of the flag.


Thanks,
- Tom


[PATCH, 0/6] Remove first_pass_instance

2015-11-15 Thread Tom de Vries

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

 1  Remove first_pass_instance from pass_vrp
 2  Remove first_pass_instance from pass_reassoc
 3  Remove first_pass_instance from pass_dominator
 4  Remove first_pass_instance from pass_object_sizes
 5  Remove first_pass_instance from pass_ccp
 6  Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here: 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


Thanks,
- Tom


[PATCH, 2/6] Remove first_pass_instance from pass_reassoc

2015-11-15 Thread Tom de Vries

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

  1Remove first_pass_instance from pass_vrp
  2Remove first_pass_instance from pass_reassoc
  3Remove first_pass_instance from pass_dominator
  4Remove first_pass_instance from pass_object_sizes
  5Remove first_pass_instance from pass_ccp
  6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patch removes first_pass_instance from pass_reassoc.

Thanks,
- Tom

Remove first_pass_instance from pass_reassoc

2015-11-15  Tom de Vries  <t...@codesourcery.com>

	* passes.def: Add arg to pass_reassoc pass instantiation.
	* tree-ssa-reassoc.c (reassoc_insert_powi_p): New static variable.
	(acceptable_pow_call, reassociate_bb): Use reassoc_insert_powi_p instead
	of first_pass_instance.
	(execute_reassoc): Add and handle insert_powi_p parameter.
	(pass_reassoc::insert_powi_p): New private member.
	(pass_reassoc::pass_reassoc): Initialize insert_powi_p.
	(pass_reassoc::set_pass_param): New member function.  Set insert_powi_p.
	(pass_reassoc::execute): Call execute_reassoc with extra arg.

---
 gcc/passes.def |  4 ++--
 gcc/tree-ssa-reassoc.c | 28 ++--
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 64c1fa1..78fdf0f 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -205,7 +205,7 @@ along with GCC; see the file COPYING3.  If not see
 	 opportunities.  */
   NEXT_PASS (pass_phi_only_cprop);
   NEXT_PASS (pass_dse);
-  NEXT_PASS (pass_reassoc);
+  NEXT_PASS (pass_reassoc, true /* insert_powi_p */);
   NEXT_PASS (pass_dce);
   NEXT_PASS (pass_forwprop);
   NEXT_PASS (pass_phiopt);
@@ -276,7 +276,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_lower_vector_ssa);
   NEXT_PASS (pass_split_paths);
   NEXT_PASS (pass_cse_reciprocals);
-  NEXT_PASS (pass_reassoc);
+  NEXT_PASS (pass_reassoc, false /* insert_powi_p */);
   NEXT_PASS (pass_strength_reduction);
   NEXT_PASS (pass_tracer);
   NEXT_PASS (pass_dominator);
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index a75290c..6b08a59 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -172,6 +172,9 @@ along with GCC; see the file COPYING3.  If not see
 destructive update for the associating op, and keep the destructive
 update together for vector sum reduction recognition.  */
 
+/* Enable insertion of __builtin_powi calls during execute_reassoc.  See
+   point 3a in the pass header comment.  */
+static bool reassoc_insert_powi_p;
 
 /* Statistics */
 static struct
@@ -3940,7 +3943,7 @@ acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
   tree fndecl, arg1;
   REAL_VALUE_TYPE c, cint;
 
-  if (!first_pass_instance
+  if (!reassoc_insert_powi_p
   || !flag_unsafe_math_optimizations
   || !is_gimple_call (stmt)
   || !has_single_use (gimple_call_lhs (stmt)))
@@ -4856,7 +4859,7 @@ reassociate_bb (basic_block bb)
 	  if (rhs_code == MULT_EXPR)
 		attempt_builtin_copysign ();
 
-	  if (first_pass_instance
+	  if (reassoc_insert_powi_p
 		  && rhs_code == MULT_EXPR
 		  && flag_unsafe_math_optimizations)
 		powi_result = attempt_builtin_powi (stmt, );
@@ -5111,11 +5114,14 @@ fini_reassoc (void)
   loop_optimizer_finalize ();
 }
 
-/* Gate and execute functions for Reassociation.  */
+/* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
+   insertion of __builtin_powi calls.  */
 
 static unsigned int
-execute_reassoc (void)
+execute_reassoc (bool insert_powi_p)
 {
+  reassoc_insert_powi_p = insert_powi_p;
+
   init_reassoc ();
 
   do_reassoc ();
@@ -5145,14 +5151,24 @@ class pass_reassoc : public gimple_opt_pass
 {
 public:
   pass_reassoc (gcc::context *ctxt)
-: gimple_opt_pass (pass_data_reassoc, ctxt)
+: gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
+  void set_pass_param (unsigned int n, bool param)
+{
+  gcc_assert (n == 0);
+  insert_powi_p = param;
+}
   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
-  virtual unsigned int execute (function *) { return execute_reassoc (); }
+  virtual unsigned int execute (function *)
+{ return execute_reassoc (insert_powi_p); }
 
+ private:
+  /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
+ point 3a in the pass header comment.  */
+  bool insert_powi_p;
 }; // class pass_reassoc
 
 } // anon namespace


[PATCH, 3/6] Remove first_pass_instance from pass_dominator

2015-11-15 Thread Tom de Vries

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

  1Remove first_pass_instance from pass_vrp
  2Remove first_pass_instance from pass_reassoc
  3Remove first_pass_instance from pass_dominator
  4Remove first_pass_instance from pass_object_sizes
  5Remove first_pass_instance from pass_ccp
  6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patch removes first_pass_instance from pass_dominator.

Thanks,
- Tom

Remove first_pass_instance from pass_dominator

2015-11-15  Tom de Vries  <t...@codesourcery.com>

	* passes.def: Add arg to pass_dominator pass instantiation.
	* tree-pass.h (first_pass_instance): Remove pass_dominator-related bit
	of comment.
	* tree-ssa-dom.c (pass_dominator::pass_dominator): Initialize
	may_peel_loop_headers_p.
	(pass_dominator::set_pass_param): New member function.  Set
	may_peel_loop_headers_p.
	(pass_dominator::may_peel_loop_headers_p): New private member.
	(pass_dominator::execute): Use may_peel_loop_headers_p instead of
	first_pass_instance.

---
 gcc/passes.def |  4 ++--
 gcc/tree-pass.h|  7 ++-
 gcc/tree-ssa-dom.c | 16 ++--
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 78fdf0f..d274a95 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -190,7 +190,7 @@ along with GCC; see the file COPYING3.  If not see
 	 propagations have already run, but before some more dead code
 	 is removed, and this place fits nicely.  Remember this when
 	 trying to move or duplicate pass_dominator somewhere earlier.  */
-  NEXT_PASS (pass_dominator);
+  NEXT_PASS (pass_dominator, true /* may_peel_loop_headers_p */);
   /* At this point the majority of const/copy propagations
 	 are exposed.  Go ahead and identify paths that should never
 	 be executed in a conforming program and isolate those paths.
@@ -279,7 +279,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_reassoc, false /* insert_powi_p */);
   NEXT_PASS (pass_strength_reduction);
   NEXT_PASS (pass_tracer);
-  NEXT_PASS (pass_dominator);
+  NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
   NEXT_PASS (pass_strlen);
   NEXT_PASS (pass_vrp, false /* warn_array_bounds_p */);
   /* The only const/copy propagation opportunities left after
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index a672d52..d647e73 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -631,11 +631,8 @@ extern bool function_called_by_processed_nodes_p (void);
 
 /* Set to true if the pass is called the first time during compilation of the
current function.  Note that using this information in the optimization
-   passes is considered not to be clean, and it should be avoided if possible.
-   This flag is currently used to prevent loops from being peeled repeatedly
-   in jump threading; it will be removed once we preserve loop structures
-   throughout the compilation -- we will be able to mark the affected loops
-   directly in jump threading, and avoid peeling them next time.  */
+   passes is considered not to be clean, and it should be avoided if
+   possible.  */
 extern bool first_pass_instance;
 
 /* Declare for plugins.  */
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index 5cb2644..aeb726c 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -536,14 +536,26 @@ class pass_dominator : public gimple_opt_pass
 {
 public:
   pass_dominator (gcc::context *ctxt)
-: gimple_opt_pass (pass_data_dominator, ctxt)
+: gimple_opt_pass (pass_data_dominator, ctxt),
+  may_peel_loop_headers_p (false)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_dominator (m_ctxt); }
+  void set_pass_param (unsigned int n, bool param)
+{
+  gcc_assert (n == 0);
+  may_peel_loop_headers_p = param;
+}
   virtual bool gate (function *) { return flag_tree_dom != 0; }
   virtual unsigned int execute (function *);
 
+ private:
+  /* This flag is used to prevent loops from being peeled repeatedly in jump
+ threading; it will be removed once we preserve loop structures throughout
+ the compilation -- we will be able to mark the affected loops directly in
+ jump threading, and avoid peeling them next time.  */
+  bool may_peel_loop_headers_p;
 }; // class pass_dominator
 
 unsigned int
@@ -619,7 +631,7 @@ pass_dominator::execute (function *fun)
   free_all_edge_infos ();
 
   /* Thread jumps, creating duplicate blocks as needed.  */
-  cfg_altered |= thread_through_all_blocks (first_pass_instance);
+  cfg_altered |= thread_through_all_blocks (may_peel_loop_headers_p);
 
   if (cfg_a

[PATCH, 4/6] Remove first_pass_instance from pass_object_sizes

2015-11-15 Thread Tom de Vries

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

  1Remove first_pass_instance from pass_vrp
  2Remove first_pass_instance from pass_reassoc
  3Remove first_pass_instance from pass_dominator
  4Remove first_pass_instance from pass_object_sizes
  5Remove first_pass_instance from pass_ccp
  6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patches removes first_pass_instance from pass_object_sizes.

Thanks,
- Tom

Remove first_pass_instance from pass_object_sizes

2015-11-15  Tom de Vries  <t...@codesourcery.com>

	* passes.def: Add arg to pass_object_sizes pass instantiation.
	* tree-object-size.c (pass_object_sizes::pass_object_sizes): Initialize
	insert_min_max_p.
	(pass_object_sizes::set_pass_param): New member function.  Set
	insert_min_max_p.
	(pass_object_sizes::insert_min_max_p): New private member.
	(pass_object_sizes::execute): Use insert_min_max_p instead of
	first_pass_instance.

---
 gcc/passes.def |  4 ++--
 gcc/tree-object-size.c | 14 +++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index d274a95..64883a7 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -77,7 +77,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_all_early_optimizations);
   PUSH_INSERT_PASSES_WITHIN (pass_all_early_optimizations)
 	  NEXT_PASS (pass_remove_cgraph_callee_edges);
-	  NEXT_PASS (pass_object_sizes);
+	  NEXT_PASS (pass_object_sizes, true /* insert_min_max_p */);
 	  NEXT_PASS (pass_ccp);
 	  /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
@@ -164,7 +164,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_backprop);
   NEXT_PASS (pass_phiprop);
   NEXT_PASS (pass_forwprop);
-  NEXT_PASS (pass_object_sizes);
+  NEXT_PASS (pass_object_sizes, false /* insert_min_max_p */);
   /* pass_build_alias is a dummy pass that ensures that we
 	 execute TODO_rebuild_alias at this point.  */
   NEXT_PASS (pass_build_alias);
diff --git a/gcc/tree-object-size.c b/gcc/tree-object-size.c
index fa3625c..459e65d 100644
--- a/gcc/tree-object-size.c
+++ b/gcc/tree-object-size.c
@@ -1217,13 +1217,21 @@ class pass_object_sizes : public gimple_opt_pass
 {
 public:
   pass_object_sizes (gcc::context *ctxt)
-: gimple_opt_pass (pass_data_object_sizes, ctxt)
+: gimple_opt_pass (pass_data_object_sizes, ctxt), insert_min_max_p (false)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_object_sizes (m_ctxt); }
+  void set_pass_param (unsigned int n, bool param)
+{
+  gcc_assert (n == 0);
+  insert_min_max_p = param;
+}
   virtual unsigned int execute (function *);
 
+ private:
+  /* Determines whether the pass instance creates MIN/MAX_EXPRs.  */
+  bool insert_min_max_p;
 }; // class pass_object_sizes
 
 /* Dummy valueize function.  */
@@ -1250,12 +1258,12 @@ pass_object_sizes::execute (function *fun)
 
 	  init_object_sizes ();
 
-	  /* In the first pass instance, only attempt to fold
+	  /* If insert_min_max_p, only attempt to fold
 	 __builtin_object_size (x, 1) and __builtin_object_size (x, 3),
 	 and rather than folding the builtin to the constant if any,
 	 create a MIN_EXPR or MAX_EXPR of the __builtin_object_size
 	 call result and the computed constant.  */
-	  if (first_pass_instance)
+	  if (insert_min_max_p)
 	{
 	  tree ost = gimple_call_arg (call, 1);
 	  if (tree_fits_uhwi_p (ost))


[PATCH, 6/6] Remove first_pass_instance

2015-11-15 Thread Tom de Vries

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

  1Remove first_pass_instance from pass_vrp
  2Remove first_pass_instance from pass_reassoc
  3Remove first_pass_instance from pass_dominator
  4Remove first_pass_instance from pass_object_sizes
  5Remove first_pass_instance from pass_ccp
  6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patch removes the variable first_pass_instance.

Thanks,
- Tom

Remove first_pass_instance

2015-11-15  Tom de Vries  <t...@codesourcery.com>

	* passes.c (first_pass_instance): Remove variable.
	(execute_todo): Remove setting of first_pass_instance.
	* tree-pass.h (first_pass_instance): Remove declaration.

---
 gcc/passes.c| 4 
 gcc/tree-pass.h | 6 --
 2 files changed, 10 deletions(-)

diff --git a/gcc/passes.c b/gcc/passes.c
index e634c5c..0e23dcb 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -151,7 +151,6 @@ debug_pass (void)
 
 /* Global variables used to communicate with passes.  */
 bool in_gimple_form;
-bool first_pass_instance;
 
 
 /* This is called from various places for FUNCTION_DECL, VAR_DECL,
@@ -2005,9 +2004,6 @@ execute_todo (unsigned int flags)
 
   timevar_push (TV_TODO);
 
-  /* Inform the pass whether it is the first time it is run.  */
-  first_pass_instance = (flags & TODO_mark_first_instance) != 0;
-
   statistics_fini_pass ();
 
   if (flags)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index d647e73..dcd2d5e 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -629,12 +629,6 @@ extern void ipa_read_optimization_summaries (void);
 extern void register_one_dump_file (opt_pass *);
 extern bool function_called_by_processed_nodes_p (void);
 
-/* Set to true if the pass is called the first time during compilation of the
-   current function.  Note that using this information in the optimization
-   passes is considered not to be clean, and it should be avoided if
-   possible.  */
-extern bool first_pass_instance;
-
 /* Declare for plugins.  */
 extern void do_per_function_toporder (void (*) (function *, void *), void *);
 


[PATCH, 5/6] Remove first_pass_instance from pass_ccp

2015-11-15 Thread Tom de Vries

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

  1Remove first_pass_instance from pass_vrp
  2Remove first_pass_instance from pass_reassoc
  3Remove first_pass_instance from pass_dominator
  4Remove first_pass_instance from pass_object_sizes
  5Remove first_pass_instance from pass_ccp
  6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patch removes first_pass_instance from pass_ccp.

Thanks,
- Tom

Remove first_pass_instance from pass_ccp

2015-11-15  Tom de Vries  <t...@codesourcery.com>

	* passes.def: Add arg to pass_ccp pass instantiation.
	* tree-ssa-ccp.c (ccp_finalize): Add param nonzero_p.  Use nonzero_p
	instead of first_pass_instance.
	(do_ssa_ccp): Add and handle param nonzero_p.
	(pass_ccp::pass_ccp): Initialize nonzero_p.
	(pass_ccp::set_pass_param): New member function.  Set nonzero_p.
	(pass_ccp::execute): Call do_ssa_ccp with extra arg.
	(pass_ccp::nonzero_p): New private member.

---
 gcc/passes.def | 10 ++
 gcc/tree-ssa-ccp.c | 27 +--
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 64883a7..17027786 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -78,7 +78,9 @@ along with GCC; see the file COPYING3.  If not see
   PUSH_INSERT_PASSES_WITHIN (pass_all_early_optimizations)
 	  NEXT_PASS (pass_remove_cgraph_callee_edges);
 	  NEXT_PASS (pass_object_sizes, true /* insert_min_max_p */);
-	  NEXT_PASS (pass_ccp);
+	  /* Don't record nonzero bits before IPA to avoid
+	 using too much memory.  */
+	  NEXT_PASS (pass_ccp, false /* nonzero_p */);
 	  /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
 	  NEXT_PASS (pass_forwprop);
@@ -157,7 +159,7 @@ along with GCC; see the file COPYING3.  If not see
   /* Initial scalar cleanups before alias computation.
 	 They ensure memory accesses are not indirect wherever possible.  */
   NEXT_PASS (pass_strip_predict_hints);
-  NEXT_PASS (pass_ccp);
+  NEXT_PASS (pass_ccp, true /* nonzero_p */);
   /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
   NEXT_PASS (pass_complete_unrolli);
@@ -209,7 +211,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_dce);
   NEXT_PASS (pass_forwprop);
   NEXT_PASS (pass_phiopt);
-  NEXT_PASS (pass_ccp);
+  NEXT_PASS (pass_ccp, true /* nonzero_p */);
   /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
   NEXT_PASS (pass_cse_sincos);
@@ -319,7 +321,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_lower_complex);
   NEXT_PASS (pass_lower_vector_ssa);
   /* Perform simple scalar cleanup which is constant/copy propagation.  */
-  NEXT_PASS (pass_ccp);
+  NEXT_PASS (pass_ccp, true /* nonzero_p */);
   NEXT_PASS (pass_object_sizes);
   /* Fold remaining builtins.  */
   NEXT_PASS (pass_fold_builtins);
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index d09fab1..b845a7b 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -886,12 +886,12 @@ do_dbg_cnt (void)
 
 
 /* Do final substitution of propagated values, cleanup the flowgraph and
-   free allocated storage.
+   free allocated storage.  If NONZERO_P, record nonzero bits.
 
Return TRUE when something was optimized.  */
 
 static bool
-ccp_finalize (void)
+ccp_finalize (bool nonzero_p)
 {
   bool something_changed;
   unsigned i;
@@ -910,9 +910,7 @@ ccp_finalize (void)
   if (!name
 	  || (!POINTER_TYPE_P (TREE_TYPE (name))
 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (name))
-		  /* Don't record nonzero bits before IPA to avoid
-		 using too much memory.  */
-		  || first_pass_instance)))
+		  || !nonzero_p)))
 	continue;
 
   val = get_value (name);
@@ -2394,16 +2392,17 @@ ccp_visit_stmt (gimple *stmt, edge *taken_edge_p, tree *output_p)
 }
 
 
-/* Main entry point for SSA Conditional Constant Propagation.  */
+/* Main entry point for SSA Conditional Constant Propagation.  If NONZERO_P,
+   record nonzero bits.  */
 
 static unsigned int
-do_ssa_ccp (void)
+do_ssa_ccp (bool nonzero_p)
 {
   unsigned int todo = 0;
   calculate_dominance_info (CDI_DOMINATORS);
   ccp_initialize ();
   ssa_propagate (ccp_visit_stmt, ccp_visit_phi_node);
-  if (ccp_finalize ())
+  if (ccp_finalize (nonzero_p))
 todo = (TODO_cleanup_cfg | TODO_update_ssa);
   free_dominance_info (CDI_DOMINATORS);
   return todo;
@@ -2429,14 +2428,22 @@ class pass_ccp : public gimple_opt_pass
 {
 public:
   pass_ccp (gcc::context *ctxt)
-: gimple_opt_pass (pass_

Re: [PATCH, 7/16] Add pass_dominator_oacc_kernels

2015-11-16 Thread Tom de Vries

On 11/11/15 12:05, Richard Biener wrote:

On Mon, 9 Nov 2015, Tom de Vries wrote:


On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

   1Insert new exit block only when needed in
  transform_to_exit_first_loop_alt
   2Make create_parallel_loop return void
   3Ignore reduction clause on kernels directive
   4Implement -foffload-alias
   5Add in_oacc_kernels_region in struct loop
   6Add pass_oacc_kernels
   7Add pass_dominator_oacc_kernels
   8Add pass_ch_oacc_kernels
   9Add pass_parallelize_loops_oacc_kernels
  10Add pass_oacc_kernels pass group in passes.def
  11Update testcases after adding kernels pass group
  12Handle acc loop directive
  13Add c-c++-common/goacc/kernels-*.c
  14Add gfortran.dg/goacc/kernels-*.f95
  15Add libgomp.oacc-c-c++-common/kernels-*.c
  16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


this patch adds pass_dominator_oacc_kernels (which we may as well call
pass_dominator_no_peel_loop_headers. It doesn't do anything
oacc-kernels-specific), to be used in the kernels pass group.

The reason I'm adding a new pass instead of using pass_dominator is that
pass_dominator uses first_pass_instance. So adding a pass_dominator instance A
before a pass_dominator instance B has the unexpected consequence that it may
change the behaviour of instance B. I've filed PR68247 - "Remove
pass_first_instance" to note this issue.


This looks ok (minus my comments to patch #10)



AFAIU, if "Remove first_pass_instance from pass_dominator" get approved 
and committed, we can drop this patch, and use this pass instantiation 
instead in the oacc_kernels pass group:

...
  NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
...

Thanks,
- Tom


Re: [PATCH, 0/6] Remove first_pass_instance

2015-11-16 Thread Tom de Vries

On 16/11/15 13:09, Bernd Schmidt wrote:

On 11/15/2015 11:55 AM, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

  1Remove first_pass_instance from pass_vrp
  2Remove first_pass_instance from pass_reassoc
  3Remove first_pass_instance from pass_dominator
  4Remove first_pass_instance from pass_object_sizes
  5Remove first_pass_instance from pass_ccp
  6Remove first_pass_instance


In 5/6 please retain the comment about memory usage.


[ FWIW, I moved that comment to passes.def, since I thought it was more 
appropriate there. ]


Done, comitted as attached.

Thanks,
- Tom
Remove first_pass_instance from pass_ccp

2015-11-15  Tom de Vries  <t...@codesourcery.com>

	* passes.def: Add arg to pass_ccp pass instantiation.
	* tree-ssa-ccp.c (ccp_finalize): Add param nonzero_p.  Use nonzero_p
	instead of first_pass_instance.
	(do_ssa_ccp): Add and handle param nonzero_p.
	(pass_ccp::pass_ccp): Initialize nonzero_p.
	(pass_ccp::set_pass_param): New member function.  Set nonzero_p.
	(pass_ccp::execute): Call do_ssa_ccp with extra arg.
	(pass_ccp::nonzero_p): New private member.

---
 gcc/passes.def | 10 ++
 gcc/tree-ssa-ccp.c | 25 +
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 64883a7..17027786 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -78,7 +78,9 @@ along with GCC; see the file COPYING3.  If not see
   PUSH_INSERT_PASSES_WITHIN (pass_all_early_optimizations)
 	  NEXT_PASS (pass_remove_cgraph_callee_edges);
 	  NEXT_PASS (pass_object_sizes, true /* insert_min_max_p */);
-	  NEXT_PASS (pass_ccp);
+	  /* Don't record nonzero bits before IPA to avoid
+	 using too much memory.  */
+	  NEXT_PASS (pass_ccp, false /* nonzero_p */);
 	  /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
 	  NEXT_PASS (pass_forwprop);
@@ -157,7 +159,7 @@ along with GCC; see the file COPYING3.  If not see
   /* Initial scalar cleanups before alias computation.
 	 They ensure memory accesses are not indirect wherever possible.  */
   NEXT_PASS (pass_strip_predict_hints);
-  NEXT_PASS (pass_ccp);
+  NEXT_PASS (pass_ccp, true /* nonzero_p */);
   /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
   NEXT_PASS (pass_complete_unrolli);
@@ -209,7 +211,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_dce);
   NEXT_PASS (pass_forwprop);
   NEXT_PASS (pass_phiopt);
-  NEXT_PASS (pass_ccp);
+  NEXT_PASS (pass_ccp, true /* nonzero_p */);
   /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
   NEXT_PASS (pass_cse_sincos);
@@ -319,7 +321,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_lower_complex);
   NEXT_PASS (pass_lower_vector_ssa);
   /* Perform simple scalar cleanup which is constant/copy propagation.  */
-  NEXT_PASS (pass_ccp);
+  NEXT_PASS (pass_ccp, true /* nonzero_p */);
   NEXT_PASS (pass_object_sizes);
   /* Fold remaining builtins.  */
   NEXT_PASS (pass_fold_builtins);
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index d09fab1..7b6b451 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -886,12 +886,12 @@ do_dbg_cnt (void)
 
 
 /* Do final substitution of propagated values, cleanup the flowgraph and
-   free allocated storage.
+   free allocated storage.  If NONZERO_P, record nonzero bits.
 
Return TRUE when something was optimized.  */
 
 static bool
-ccp_finalize (void)
+ccp_finalize (bool nonzero_p)
 {
   bool something_changed;
   unsigned i;
@@ -912,7 +912,7 @@ ccp_finalize (void)
 	  && (!INTEGRAL_TYPE_P (TREE_TYPE (name))
 		  /* Don't record nonzero bits before IPA to avoid
 		 using too much memory.  */
-		  || first_pass_instance)))
+		  || !nonzero_p)))
 	continue;
 
   val = get_value (name);
@@ -2394,16 +2394,17 @@ ccp_visit_stmt (gimple *stmt, edge *taken_edge_p, tree *output_p)
 }
 
 
-/* Main entry point for SSA Conditional Constant Propagation.  */
+/* Main entry point for SSA Conditional Constant Propagation.  If NONZERO_P,
+   record nonzero bits.  */
 
 static unsigned int
-do_ssa_ccp (void)
+do_ssa_ccp (bool nonzero_p)
 {
   unsigned int todo = 0;
   calculate_dominance_info (CDI_DOMINATORS);
   ccp_initialize ();
   ssa_propagate (ccp_visit_stmt, ccp_visit_phi_node);
-  if (ccp_finalize ())
+  if (ccp_finalize (nonzero_p))
 todo = (TODO_cleanup_cfg | TODO_update_ssa);
   free_dominance_info (CDI_DOMINATORS);
   return todo;
@@ -2429,14 +2430,22 @@ class pass_ccp : public gimple_opt_pass
 {
 public:
   pass_ccp (gcc::context *ctxt)
-: gimple_opt_pass (pass_data_ccp, ctxt)
+: gimple_opt_pass (pass_data_ccp, ctxt), nonzero_p (false)
   {}
 
   /* opt_pass methods: */
   op

[PATCH] Clear LOOP_CLOSED_SSA after pass_ccp

2015-11-16 Thread Tom de Vries

Hi,

while playing around with inserting pass_ccp here and there in the pass 
list, I put it after a pass where the loops state contained LOOP_CLOSED_SSA.


And apparently pass_ccp does not preserve loop-closed ssa.

As a consequence, during executing the pass_ccp todos, 
verify_loop_closed_ssa fails.


This patch fixes that by noting in pass_ccp that it does not preserve 
loop-closed ssa.


OK for trunk if bootstrap and reg-test succeeds?

Thanks,
- Tom

Clear LOOP_CLOSED_SSA after pass_ccp

2015-11-16  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-ccp.c (do_ssa_ccp): Clear LOOP_CLOSED_SSA in loops state if
	something changed.

---
 gcc/tree-ssa-ccp.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 7b6b451..7e8bc52 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -139,6 +139,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "builtins.h"
 #include "tree-chkp.h"
+#include "cfgloop.h"
 
 
 /* Possible lattice values.  */
@@ -2402,10 +2403,17 @@ do_ssa_ccp (bool nonzero_p)
 {
   unsigned int todo = 0;
   calculate_dominance_info (CDI_DOMINATORS);
+
   ccp_initialize ();
   ssa_propagate (ccp_visit_stmt, ccp_visit_phi_node);
   if (ccp_finalize (nonzero_p))
-todo = (TODO_cleanup_cfg | TODO_update_ssa);
+{
+  todo = (TODO_cleanup_cfg | TODO_update_ssa);
+
+  /* ccp_finalize does not preserve loop-closed ssa.  */
+  loops_state_clear (LOOP_CLOSED_SSA);
+}
+
   free_dominance_info (CDI_DOMINATORS);
   return todo;
 }


[PATCH] Make fdump-tree-sccp-details more complete

2015-11-16 Thread Tom de Vries

Hi,

pass_scev_cprop contains a bit where it replaces uses of an ssa-name 
with constants.  This is currently not noted in the dump-file, even with 
TDF_DETAILS.


This patch adds that information in the dump-file, in this format:
...
Replacing uses of: D__lsm.10_34 with: 1
...

OK for trunk if bootstrap and reg-test succeeds?

Thanks,
- Tom
Make fdump-tree-sccp-details more complete

2015-11-16  Tom de Vries  <t...@codesourcery.com>

	* tree-scalar-evolution.c (scev_const_prop): Dump details if replacing
	uses of ssa_name with constant.

---
 gcc/tree-scalar-evolution.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index e90aafb..27630f0 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -3465,7 +3465,17 @@ scev_const_prop (void)
 
 	  /* Replace the uses of the name.  */
 	  if (name != ev)
-	replace_uses_by (name, ev);
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file, "Replacing uses of: ");
+		  print_generic_expr (dump_file, name, 0);
+		  fprintf (dump_file, " with: ");
+		  print_generic_expr (dump_file, ev, 0);
+		  fprintf (dump_file, "\n");
+		}
+	  replace_uses_by (name, ev);
+	}
 
 	  if (!ssa_names_to_remove)
 	ssa_names_to_remove = BITMAP_ALLOC (NULL);


Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-16 Thread Tom de Vries

On 16/11/15 13:45, Richard Biener wrote:

+ NEXT_PASS (pass_scev_cprop);
> >
> >What's that for?  It's supposed to help removing loops - I don't
> >expect kernels to vanish.

>
>I'm using pass_scev_cprop for the "final value replacement" functionality.
>Added comment.



That functionality is intented to enable loop removal.


Let me try to explain in a bit more detail.


I.

Consider a parloops testcase test.c, with a use of the final value of 
the iteration variable (return i):

...
unsigned int
foo (int n, int *a)
{
  int i;
  for (i = 0; i < n; ++i)
a[i] = 1;

  return i;
}
...

Say we compile with:
...
$ gcc -S -O2 test.c -ftree-parallelize-loops=2 -fdump-tree-all-details
...

We can see here in the parloops dump-file that the loop was parallelized:
...
  SUCCESS: may be parallelized
...

Now say that we run with -fno-tree-scev-cprop in addition. Instead we 
find in the parloops dump-file:

...
phi is i_1 = PHI 
arg of phi to exit:   value i_10 used outside loop
  checking if it a part of reduction pattern:
  FAILED: it is not a part of reduction.
...

Auto-parallelization fails in this case because there is a loop exit phi 
(the one in bb 6 defining i_1) which is not part of a reduction:

...
  :
  # i_13 = PHI <0(3), i_10(5)>
  _5 = (long unsigned int) i_13;
  _6 = _5 * 4;
  _8 = a_7(D) + _6;
  *_8 = 1;
  i_10 = i_13 + 1;
  if (n_4(D) > i_10)
goto ;
  else
goto ;

  :
  goto ;

  :
  # i_1 = PHI 
  _20 = (unsigned int) i_1;
...

With -ftree-scev-cprop, we find in the pass_scev_cprop dump-file:
...
final value replacement:
  i_1 = PHI 
  with
  i_1 = n_4(D);
...

And the resulting loop no longer has any loop exit phis, so 
auto-parallelization succeeds:

...
  :
  # i_13 = PHI <0(3), i_10(5)>
  _5 = (long unsigned int) i_13;
  _6 = _5 * 4;
  _8 = a_7(D) + _6;
  *_8 = 1;
  i_10 = i_13 + 1;
  if (n_4(D) > i_10)
goto ;
  else
goto ;

  :
  goto ;

  :
  _20 = (unsigned int) n_4(D);
...

[ I've filed PR68373 - "autopar fails on loop exit phi with argument 
defined outside loop", for a slightly different testcase where despite 
the final value replacement autopar still fails. ]



II.

Now, back to oacc kernels.

Consider test-case kernels-loop-n.f95 (will add this one to the test-cases):
...
module test
contains
  subroutine foo(n)
implicit none
integer :: n
integer, dimension (0:n-1) :: a, b, c
integer:: i, ii
do i = 0, n - 1
   a(i) = i * 2
end do

do i = 0, n -1
   b(i) = i * 4
end do

!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
do ii = 0, n - 1
   c(ii) = a(ii) + b(ii)
end do
!$acc end kernels

do i = 0, n - 1
   if (c(i) .ne. a(i) + b(i)) call abort
end do

  end subroutine foo
end module test
...

The loop at the start of the kernels pass group contains an in-memory 
iteration variable, with a store to '*_9 = _38'.

...
  :
  _13 = *.omp_data_i_4(D).c;
  c.21_14 = *_13;
  _16 = *_9;
  _17 = (integer(kind=8)) _16;
  _18 = *.omp_data_i_4(D).a;
  a.22_19 = *_18;
  _23 = MEM[(integer(kind=4)[0:D.3488] *)a.22_19][_17];
  _24 = *.omp_data_i_4(D).b;
  b.23_25 = *_24;
  _29 = MEM[(integer(kind=4)[0:D.3484] *)b.23_25][_17];
  _30 = _23 + _29;
  MEM[(integer(kind=4)[0:D.3480] *)c.21_14][_17] = _30;
  _38 = _16 + 1;
  *_9 = _38;
  if (_8 == _16)
goto ;
  else
goto ;
...

After pass_lim/pass_copy_prop, we've rewritten that into using a local 
iteration variable, but we've generated a read of the final value of the 
iteration variable outside the loop, which means auto-parallelization 
will fail:

...
  :
  # D__lsm.29_12 = PHI 
  _17 = (integer(kind=8)) D__lsm.29_12;
  _23 = MEM[(integer(kind=4)[0:D.3488] *)a.22_19][_17];
  _29 = MEM[(integer(kind=4)[0:D.3484] *)b.23_25][_17];
  _30 = _23 + _29;
  MEM[(integer(kind=4)[0:D.3480] *)c.21_14][_17] = _30;
  _38 = D__lsm.29_12 + 1;
  if (_8 == D__lsm.29_12)
goto ;
  else
goto ;

  :
  # D__lsm.29_27 = PHI <_38(5)>
  *_9 = D__lsm.29_27;
  goto ;

  :
  goto ;
...

This makes it similar to the parloops example above, and that's why I've 
added pass_scev_cprop in the kernels pass group.


[ And for some kernels test-cases with constant loop bound, it's not the 
final value replacement bit that does the substitution, but the first 
bit in scev_const_prop using resolve_mixers. So that's a related reason 
to use pass_scev_cprop. ]


Thanks,
- Tom


Re: [PATCH, 5/16] Add in_oacc_kernels_region in struct loop

2015-11-16 Thread Tom de Vries

On 11/11/15 11:55, Richard Biener wrote:

On Mon, 9 Nov 2015, Tom de Vries wrote:


On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

   1Insert new exit block only when needed in
  transform_to_exit_first_loop_alt
   2Make create_parallel_loop return void
   3Ignore reduction clause on kernels directive
   4Implement -foffload-alias
   5Add in_oacc_kernels_region in struct loop
   6Add pass_oacc_kernels
   7Add pass_dominator_oacc_kernels
   8Add pass_ch_oacc_kernels
   9Add pass_parallelize_loops_oacc_kernels
  10Add pass_oacc_kernels pass group in passes.def
  11Update testcases after adding kernels pass group
  12Handle acc loop directive
  13Add c-c++-common/goacc/kernels-*.c
  14Add gfortran.dg/goacc/kernels-*.f95
  15Add libgomp.oacc-c-c++-common/kernels-*.c
  16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


this patch adds and initializes the field in_oacc_kernels_region field in
struct loop.

The field is used to signal to subsequent passes that we're dealing with a
loop in a kernels region that we're trying parallelize.

Note that we do not parallelize kernels regions with more than one loop nest.
[ In general, kernels regions with more than one loop nest should be split up
into seperate kernels regions, but that's not supported atm. ]


I think mark_loops_in_oacc_kernels_region can be greatly simplified.

Both region entry and exit should have the same ->loop_father (a SESE
region).  Then you can just walk that loops inner (and their sibling)
loops checking their header domination relation with the region entry
exit (only necessary for direct inner loops).


Updated patch to use the loops structure.  Atm I'm also skipping loops 
containing sibling loops, since I have no test-cases for that yet.


Thanks,
- Tom

Add in_oacc_kernels_region in struct loop

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* cfgloop.h (struct loop): Add in_oacc_kernels_region field.
	* omp-low.c (mark_loops_in_oacc_kernels_region): New function.
	(expand_omp_target): Call mark_loops_in_oacc_kernels_region.

---
 gcc/cfgloop.h |  3 +++
 gcc/omp-low.c | 43 +++
 2 files changed, 46 insertions(+)

diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 6af6893..ee73bf9 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -191,6 +191,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* True if we should try harder to vectorize this loop.  */
   bool force_vectorize;
 
+  /* True if the loop is part of an oacc kernels region.  */
+  bool in_oacc_kernels_region;
+
   /* For SIMD loops, this is a unique identifier of the loop, referenced
  by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE
  builtins.  */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 5f76434..fba7bbd 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12450,6 +12450,46 @@ get_oacc_ifn_dim_arg (const gimple *stmt)
   return (int) axis;
 }
 
+/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
+   at REGION_EXIT.  */
+
+static void
+mark_loops_in_oacc_kernels_region (basic_block region_entry,
+   basic_block region_exit)
+{
+  struct loop *outer = region_entry->loop_father;
+  gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
+
+  /* Don't parallelize the kernels region if it contains more than one outer
+ loop.  */
+  unsigned int nr_outer_loops = 0;
+  struct loop *single_outer;
+  for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
+{
+  gcc_assert (loop_outer (loop) == outer);
+
+  if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
+	continue;
+
+  if (region_exit != NULL
+	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
+	continue;
+
+  nr_outer_loops++;
+  single_outer = loop;
+}
+  if (nr_outer_loops != 1)
+return;
+
+  for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner)
+if (loop->next)
+  return;
+
+  /* Mark the loops in the region.  */
+  for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
+loop->in_oacc_kernels_region = true;
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -1250

Re: [PATCH, 5/16] Add in_oacc_kernels_region in struct loop

2015-11-16 Thread Tom de Vries

On 11/11/15 11:55, Richard Biener wrote:

On Mon, 9 Nov 2015, Tom de Vries wrote:


On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

   1Insert new exit block only when needed in
  transform_to_exit_first_loop_alt
   2Make create_parallel_loop return void
   3Ignore reduction clause on kernels directive
   4Implement -foffload-alias
   5Add in_oacc_kernels_region in struct loop
   6Add pass_oacc_kernels
   7Add pass_dominator_oacc_kernels
   8Add pass_ch_oacc_kernels
   9Add pass_parallelize_loops_oacc_kernels
  10Add pass_oacc_kernels pass group in passes.def
  11Update testcases after adding kernels pass group
  12Handle acc loop directive
  13Add c-c++-common/goacc/kernels-*.c
  14Add gfortran.dg/goacc/kernels-*.f95
  15Add libgomp.oacc-c-c++-common/kernels-*.c
  16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


this patch adds and initializes the field in_oacc_kernels_region field in
struct loop.

The field is used to signal to subsequent passes that we're dealing with a
loop in a kernels region that we're trying parallelize.

Note that we do not parallelize kernels regions with more than one loop nest.
[ In general, kernels regions with more than one loop nest should be split up
into seperate kernels regions, but that's not supported atm. ]


I think mark_loops_in_oacc_kernels_region can be greatly simplified.

Both region entry and exit should have the same ->loop_father (a SESE
region).  Then you can just walk that loops inner (and their sibling)
loops checking their header domination relation with the region entry
exit (only necessary for direct inner loops).


Updated patch to use the loops structure.  Atm I'm also skipping loops 
containing sibling loops, since I have no test-cases for that yet.


Thanks,
- Tom

Add in_oacc_kernels_region in struct loop

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* cfgloop.h (struct loop): Add in_oacc_kernels_region field.
	* omp-low.c (mark_loops_in_oacc_kernels_region): New function.
	(expand_omp_target): Call mark_loops_in_oacc_kernels_region.

---
 gcc/cfgloop.h |  3 +++
 gcc/omp-low.c | 43 +++
 2 files changed, 46 insertions(+)

diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 6af6893..ee73bf9 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -191,6 +191,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* True if we should try harder to vectorize this loop.  */
   bool force_vectorize;
 
+  /* True if the loop is part of an oacc kernels region.  */
+  bool in_oacc_kernels_region;
+
   /* For SIMD loops, this is a unique identifier of the loop, referenced
  by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE
  builtins.  */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 5f76434..fba7bbd 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12450,6 +12450,46 @@ get_oacc_ifn_dim_arg (const gimple *stmt)
   return (int) axis;
 }
 
+/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
+   at REGION_EXIT.  */
+
+static void
+mark_loops_in_oacc_kernels_region (basic_block region_entry,
+   basic_block region_exit)
+{
+  struct loop *outer = region_entry->loop_father;
+  gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
+
+  /* Don't parallelize the kernels region if it contains more than one outer
+ loop.  */
+  unsigned int nr_outer_loops = 0;
+  struct loop *single_outer;
+  for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
+{
+  gcc_assert (loop_outer (loop) == outer);
+
+  if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
+	continue;
+
+  if (region_exit != NULL
+	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
+	continue;
+
+  nr_outer_loops++;
+  single_outer = loop;
+}
+  if (nr_outer_loops != 1)
+return;
+
+  for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner)
+if (loop->next)
+  return;
+
+  /* Mark the loops in the region.  */
+  for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
+loop->in_oacc_kernels_region = true;
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -1250

Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-16 Thread Tom de Vries

On 11/11/15 12:02, Richard Biener wrote:

On Mon, 9 Nov 2015, Tom de Vries wrote:


On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

   1Insert new exit block only when needed in
  transform_to_exit_first_loop_alt
   2Make create_parallel_loop return void
   3Ignore reduction clause on kernels directive
   4Implement -foffload-alias
   5Add in_oacc_kernels_region in struct loop
   6Add pass_oacc_kernels
   7Add pass_dominator_oacc_kernels
   8Add pass_ch_oacc_kernels
   9Add pass_parallelize_loops_oacc_kernels
  10Add pass_oacc_kernels pass group in passes.def
  11Update testcases after adding kernels pass group
  12Handle acc loop directive
  13Add c-c++-common/goacc/kernels-*.c
  14Add gfortran.dg/goacc/kernels-*.f95
  15Add libgomp.oacc-c-c++-common/kernels-*.c
  16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.



This patch adds the pass_oacc_kernels pass group to the pass list in
passes.def.

Note the repetition of pass_lim/pass_copy_prop. The first pair is for an inner
loop in a loop nest, the second for an outer loop in a loop nest.


@@ -86,6 +86,27 @@ along with GCC; see the file COPYING3.  If not see
   /* pass_build_ealias is a dummy pass that ensures that we
  execute TODO_rebuild_alias at this point.  */
   NEXT_PASS (pass_build_ealias);
+ /* Pass group that runs when there are oacc kernels in the
+function.  */
+ NEXT_PASS (pass_oacc_kernels);
+ PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+ NEXT_PASS (pass_dominator_oacc_kernels);
+ NEXT_PASS (pass_ch_oacc_kernels);
+ NEXT_PASS (pass_dominator_oacc_kernels);
+ NEXT_PASS (pass_tree_loop_init);
+ NEXT_PASS (pass_lim);
+ NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_lim);
+ NEXT_PASS (pass_copy_prop);

iterate lim/copyprop twice?!  Why's that needed?



I've managed to eliminate the last pass_copy_prop, but not pass_lim. 
I've added a comment:

...
  /* We use pass_lim to rewrite in-memory iteration and reduction
 variable accesses in loops into local variables accesses.
 However, a single pass instantion manages to do this only for
 one loop level, so we use pass_lim twice to at least be able to
 handle a loop nest with a depth of two.  */
  NEXT_PASS (pass_lim);
  NEXT_PASS (pass_copy_prop);
  NEXT_PASS (pass_lim);
...


+ NEXT_PASS (pass_scev_cprop);

What's that for?  It's supposed to help removing loops - I don't
expect kernels to vanish.


I'm using pass_scev_cprop for the "final value replacement" 
functionality. Added comment.




+ NEXT_PASS (pass_tree_loop_done);
+ NEXT_PASS (pass_dominator_oacc_kernels);

Three times DOM?  No please.  I wonder why you don't run oacc_kernels
after FRE and drop the initial DOM(s).



Done. There's just one pass_dominator_oacc_kernels left now.


+ NEXT_PASS (pass_dce);
+ NEXT_PASS (pass_tree_loop_init);
+ NEXT_PASS (pass_parallelize_loops_oacc_kernels);
+ NEXT_PASS (pass_expand_omp_ssa);
+ NEXT_PASS (pass_tree_loop_done);

The switches into/outof tree_loop also look odd to me, but well
(they'll be controlled by -ftree-loop-optimize)).



I've eliminated all the uses for pass_tree_loop_init/pass_tree_loop_done 
in the pass group. Instead, I've added conditional loop optimizer setup in:

-  pass_lim and pass_scev_cprop (added in this patch), and
- pass_parallelize_loops_oacc_kernels (added in patch "Add
  pass_parallelize_loops_oacc_kernels").

Thanks,
- Tom

Add pass_oacc_kernels pass group in passes.def

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (pass_expand_omp_ssa::clone): New function.
	* passes.def: Add pass_oacc_kernels pass group.
	* tree-ssa-loop-ch.c (pass_ch::clone): New function.
	* tree-ssa-loop-im.c (tree_ssa_lim): Allow to run outside
	pass_tree_loop.
	* tree-ssa-loop.c (pass_scev_cprop::clone): New function.
	(pass_scev_cprop::execute): Allow to run outside pass_tree_loop.

---
 gcc/omp-low.c  |  1 +
 gcc/passes.def | 25 +
 gcc/tree-ssa-loop-ch.c |  2 ++
 gcc/tree-ssa-loop-im.c | 14 ++
 g

Re: [PATCH, 9/16] Add pass_parallelize_loops_oacc_kernels

2015-11-16 Thread Tom de Vries

On 09/11/15 20:52, Tom de Vries wrote:

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


This patch adds pass_parallelize_loops_oacc_kernels.

There's a number of things we do differently in parloops for oacc kernels:
- in normal parloops, we generate code to choose between a parallel
   version of the loop, and a sequential (low iteration count) version.
   Since the code in oacc kernels region is supposed to run on the
   accelerator anyway, we skip this check, and don't add a low iteration
   count loop.
- in normal parloops, we generate an #pragma omp parallel /
   GIMPLE_OMP_RETURN pair to delimit the region which will we split off
   into a thread function. Since the oacc kernels region is already
   split off, we don't add this pair.
- we indicate the parallelization factor by setting the oacc function
   attributes
- we generate an #pragma oacc loop instead of an #pragma omp for, and
   we add the gang clause
- in normal parloops, we rewrite the variable accesses in the loop in
   terms into accesses relative to a thread function parameter. For the
   oacc kernels region, that rewrite has already been done at omp-lower,
   so we skip this.
- we need to ensure that the entire kernels region can be run in
   parallel. The loop independence check is already present, so for oacc
   kernels we add a check between blocks outside the loop and the entire
   region.
- we guard stores in the blocks outside the loop with gang_pos == 0.
   There's no need for each gang to write to a single location, we can
   do this in just one gang. (Typically this is the write of the final
   value of the iteration variable if that one is copied back to the
   host).



Reposting with loop optimizer init added in 
pass_parallelize_loops_oacc_kernels::execute.


Thanks,
- Tom
Add pass_parallelize_loops_oacc_kernels

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (set_oacc_fn_attrib): Make extern.
	* omp-low.c (expand_omp_atomic_fetch_op):  Release defs of update stmt.
	* omp-low.h (set_oacc_fn_attrib): Declare.
	* tree-parloops.c (struct reduction_info): Add reduc_addr field.
(create_call_for_reduction_1): Handle case that reduc_addr is non-NULL.
	(create_parallel_loop, gen_parallel_loop, try_create_reduction_list):
	Add and handle function parameter oacc_kernels_p.
	(get_omp_data_i_param): New function.
	(ref_conflicts_with_region, oacc_entry_exit_ok_1)
	(oacc_entry_exit_single_gang, oacc_entry_exit_ok): New function.
	(parallelize_loops): Add and handle function parameter oacc_kernels_p.
	Calculate dominance info.  Skip loops that are not in a kernels region
	in oacc_kernels_p mode.  Skip inner loops of parallelized loops.
	(pass_parallelize_loops::execute): Call parallelize_loops with false
	argument.
	(pass_data_parallelize_loops_oacc_kernels): New pass_data.
	(class pass_parallelize_loops_oacc_kernels): New pass.
	(pass_parallelize_loops_oacc_kernels::execute)
	(make_pass_parallelize_loops_oacc_kernels): New function.
	* tree-pass.h (make_pass_parallelize_loops_oacc_kernels): Declare.

---
 gcc/omp-low.c   |   8 +-
 gcc/omp-low.h   |   1 +
 gcc/tree-parloops.c | 693 +++-
 gcc/tree-pass.h |   2 +
 4 files changed, 640 insertions(+), 64 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index fba7bbd..9eae09a 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11944,10 +11944,14 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
   gsi_remove (, true);
   gsi = gsi_l

Re: [PATCH, 6/6] Remove first_pass_instance

2015-11-15 Thread Tom de Vries

[ Adding CC Le-Chun Wu ]

On 15/11/15 17:22, Tom de Vries wrote:

On 15/11/15 16:24, David Malcolm wrote:

On Sun, 2015-11-15 at 12:08 +0100, Tom de Vries wrote:

On 15/11/15 11:55, Tom de Vries wrote:

[ was: Re: [PATCH] Remove first_pass_instance from pass_vrp ]

This patch series removes first_pass_instance.

   1Remove first_pass_instance from pass_vrp
   2Remove first_pass_instance from pass_reassoc
   3Remove first_pass_instance from pass_dominator
   4Remove first_pass_instance from pass_object_sizes
   5Remove first_pass_instance from pass_ccp
   6Remove first_pass_instance

Bootstrapped and reg-tested on x86_64.

I will post the individual patches in reply to this email.

[ I won't post the first patch though. It was already posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01701.html . ]


this patch removes the variable first_pass_instance.


Can we also get rid of TODO_mark_first_instance, or would that be a
followup?


TODO_mark_first_instance is used in position_pass, which AFAIU is used
in the plugin infrastructure. I'm not familiar with the plugin
infrastructure and concepts, so I can't say anything sensible about
whether we can get rid of the flag.

Thanks,
- Tom




Re: [PATCH, 4/16] Implement -foffload-alias

2015-11-12 Thread Tom de Vries

On 11/11/15 12:00, Jakub Jelinek wrote:

On Wed, Nov 11, 2015 at 11:51:02AM +0100, Richard Biener wrote:

The option -foffload-alias=pointer instructs the compiler to assume that
objects references in an offload region do not alias.

The option -foffload-alias=all instructs the compiler to make no
assumptions about aliasing in offload regions.

The default value is -foffload-alias=none.


I think global options for this is nonsense.  Please follow what
we do for #pragma GCC ivdep for example, thus allow the alias
behavior to be specified per "region" (whatever makes sense here
in the context of offloading).


So, IIUC, instead of a global option foffload-alias, you're saying 
something like the following would be acceptable:

...
#pragma GCC offload-alias=
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
  {
#pragma acc loop
for (COUNTERTYPE ii = 0; ii < N; ii++)
  c[ii] = a[ii] + b[ii];
  }
...
?

I suppose that would work (though a global option would allow us to 
easily switch between none/pointer/all values in a large number of 
files, something that might be useful when f.i. running an openacc  test 
suite).



Yeah, completely agreed.  I don't see why the offloaded region would be in
any way special, they are C/C++/Fortran code as any other.
What we can and should improve is teach IPA aliasing/points to analysis
about the way we lower the host vs. offloading region boundary, so that
if alias analysis on the caller of GOMP_target_ext/GOACC_parallel_keyed
determines something it can be used on the offloaded function side and vice
versa,


I agree this would be a nice way to solve the aliasing info problem, but 
considering the remark of Richard at 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46032#c19 :

...
Not that I think IPA PTA is anywhere near production ready
...
I haven't considered proceeding in that direction.

Thanks,
- Tom


but a switch like the above is just wrong.




Re: [PATCH, 8/16] Add pass_ch_oacc_kernels

2015-11-11 Thread Tom de Vries

On 09/11/15 19:33, Tom de Vries wrote:

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


this patch adds a pass pass_ch_oacc_kernels, which is like pass_ch, but
only runs for loops with oacc_kernels_region set.

[ But... thinking about it a bit more, I think that we could use a
regular pass_ch instead. We only use the kernels pass group for a single
loop nest in a kernels region, and we mark all the loops in the loop
nest with oacc_kernels_region. So I think that the oacc_kernels_region
test in pass_ch_oacc_kernels::process_loop_p evaluates to true. ]

So, I'll try to confirm with retesting that we can drop this patch.



That's confirmed. I can use pass_ch instead of pass_ch_oacc_kernels, so 
I'm dropping this patch from the series.


Thanks,
- Tom



[PATCH] Improve comments in pass_tree_loop_init::execute

2015-11-17 Thread Tom de Vries

Hi,

this no-functional-changes patch improves comments in 
pass_tree_loop_init::execute.


For the discussion related to the comment for scev_initialize, see:
- https://gcc.gnu.org/ml/gcc-patches/2013-02/msg01127.html
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56426

OK for trunk?

Thanks,
- Tom
Improve comments in pass_tree_loop_init::execute

2015-11-17  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-loop.c (pass_tree_loop_init::execute): Improve comments.

---
 gcc/tree-ssa-loop.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 570406f..40df84f 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -276,12 +276,21 @@ public:
 unsigned int
 pass_tree_loop_init::execute (function *fun ATTRIBUTE_UNUSED)
 {
+  /* When processing a loop in the loop pipeline, we should be able to assert
+ that:
+   (loops_state_satisfies_p (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS
+	  | LOOP_CLOSED_SSA)
+	&& scev_initialized_p ())
+  */
+
   loop_optimizer_init (LOOPS_NORMAL
 		   | LOOPS_HAVE_RECORDED_EXITS);
   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
 
-  /* We might discover new loops, e.g. when turning irreducible
- regions into reducible.  */
+  /* Note that we run scev_initialize here even if number_of_loops () <= 1.
+ Even if we have no real loops now, we might discover new loops while
+ executing the loop pipeline, e.g. when turning irreducible regions into
+ reducible, in which case we still would need scev to be initialized.  */
   scev_initialize ();
 
   return 0;


Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-17 Thread Tom de Vries

On 17/11/15 11:05, Richard Biener wrote:

On Tue, Nov 17, 2015 at 12:20 AM, Tom de Vries <tom_devr...@mentor.com> wrote:

On 16/11/15 13:45, Richard Biener wrote:


+ NEXT_PASS (pass_scev_cprop);


What's that for?  It's supposed to help removing loops - I don't
expect kernels to vanish.




I'm using pass_scev_cprop for the "final value replacement"
functionality.
Added comment.




That functionality is intented to enable loop removal.



Let me try to explain in a bit more detail.


I.

Consider a parloops testcase test.c, with a use of the final value of the
iteration variable (return i):
...
unsigned int
foo (int n, int *a)
{
   int i;
   for (i = 0; i < n; ++i)
 a[i] = 1;

   return i;
}
...

Say we compile with:
...
$ gcc -S -O2 test.c -ftree-parallelize-loops=2 -fdump-tree-all-details
...

We can see here in the parloops dump-file that the loop was parallelized:
...
   SUCCESS: may be parallelized
...

Now say that we run with -fno-tree-scev-cprop in addition. Instead we find
in the parloops dump-file:
...
phi is i_1 = PHI <i_10(4)>
arg of phi to exit:   value i_10 used outside loop
   checking if it a part of reduction pattern:
   FAILED: it is not a part of reduction.
...

Auto-parallelization fails in this case because there is a loop exit phi
(the one in bb 6 defining i_1) which is not part of a reduction:
...
   :
   # i_13 = PHI <0(3), i_10(5)>
   _5 = (long unsigned int) i_13;
   _6 = _5 * 4;
   _8 = a_7(D) + _6;
   *_8 = 1;
   i_10 = i_13 + 1;
   if (n_4(D) > i_10)
 goto ;
   else
 goto ;

   :
   goto ;

   :
   # i_1 = PHI <i_10(4)>
   _20 = (unsigned int) i_1;
...

With -ftree-scev-cprop, we find in the pass_scev_cprop dump-file:
...
final value replacement:
   i_1 = PHI <i_10(4)>
   with
   i_1 = n_4(D);
...

And the resulting loop no longer has any loop exit phis, so
auto-parallelization succeeds:
...
   :
   # i_13 = PHI <0(3), i_10(5)>
   _5 = (long unsigned int) i_13;
   _6 = _5 * 4;
   _8 = a_7(D) + _6;
   *_8 = 1;
   i_10 = i_13 + 1;
   if (n_4(D) > i_10)
 goto ;
   else
 goto ;

   :
   goto ;

   :
   _20 = (unsigned int) n_4(D);
...

[ I've filed PR68373 - "autopar fails on loop exit phi with argument defined
outside loop", for a slightly different testcase where despite the final
value replacement autopar still fails. ]


II.

Now, back to oacc kernels.

Consider test-case kernels-loop-n.f95 (will add this one to the test-cases):
...
module test
contains
   subroutine foo(n)
 implicit none
 integer :: n
 integer, dimension (0:n-1) :: a, b, c
 integer:: i, ii
 do i = 0, n - 1
a(i) = i * 2
 end do

 do i = 0, n -1
b(i) = i * 4
 end do

 !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
 do ii = 0, n - 1
c(ii) = a(ii) + b(ii)
 end do
 !$acc end kernels

 do i = 0, n - 1
if (c(i) .ne. a(i) + b(i)) call abort
 end do

   end subroutine foo
end module test
...

The loop at the start of the kernels pass group contains an in-memory
iteration variable, with a store to '*_9 = _38'.
...
   :
   _13 = *.omp_data_i_4(D).c;
   c.21_14 = *_13;
   _16 = *_9;
   _17 = (integer(kind=8)) _16;
   _18 = *.omp_data_i_4(D).a;
   a.22_19 = *_18;
   _23 = MEM[(integer(kind=4)[0:D.3488] *)a.22_19][_17];
   _24 = *.omp_data_i_4(D).b;
   b.23_25 = *_24;
   _29 = MEM[(integer(kind=4)[0:D.3484] *)b.23_25][_17];
   _30 = _23 + _29;
   MEM[(integer(kind=4)[0:D.3480] *)c.21_14][_17] = _30;
   _38 = _16 + 1;
   *_9 = _38;
   if (_8 == _16)
 goto ;
   else
 goto ;
...

After pass_lim/pass_copy_prop, we've rewritten that into using a local
iteration variable, but we've generated a read of the final value of the
iteration variable outside the loop, which means auto-parallelization will
fail:
...
   :
   # D__lsm.29_12 = PHI <D__lsm.29_15(4), _38(7)>
   _17 = (integer(kind=8)) D__lsm.29_12;
   _23 = MEM[(integer(kind=4)[0:D.3488] *)a.22_19][_17];
   _29 = MEM[(integer(kind=4)[0:D.3484] *)b.23_25][_17];
   _30 = _23 + _29;
   MEM[(integer(kind=4)[0:D.3480] *)c.21_14][_17] = _30;
   _38 = D__lsm.29_12 + 1;
   if (_8 == D__lsm.29_12)
 goto ;
   else
 goto ;

   :
   # D__lsm.29_27 = PHI <_38(5)>
   *_9 = D__lsm.29_27;
   goto ;


So this store is not actually necessary?


a.
In the case of this example, the store is dead.

There is a corresponding load at the point that we split off the region:
...
  :
  #pragma omp return

  :
  D.3635 = .omp_data_arr.25.ii;
  ii = *D.3635;
...

This load is later removed, given that ii is unused after the region. 
But once the region is split off,  there's nothing in the context of the 
store to suggest that it's dead.


And to get rid of the load of ii before the region is split off, we 
would have to implement some sort of liveness analysis on pre-ssa code.


b.
There's the case where there is an explicit use of ii after the region, 
in w

Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-17 Thread Tom de Vries

On 17/11/15 16:18, Richard Biener wrote:

IMHO autopar needs to handle induction itself.

>
>I'm not sure what you mean. Could you elaborate?  Autopar handles induction
>variables, but it doesn't handle exit phis reading the final value of the
>induction variable. Is that what you want fixed? How?

Yes.  Perform final value replacement.



I see. Calling scev_const_prop in pass_parallelize_loops_oacc_kernels 
seems to work fine.


Doing the same for pass_parallelize_loops like this:
...
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 17415a8..d944395 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2787,6 +2787,9 @@ pass_parallelize_loops::execute (function *fun)
   if (number_of_loops (fun) <= 1)
 return 0;

+  unsigned int sccp_todo = scev_const_prop ();
+  gcc_assert (sccp_todo == 0);
+
   if (parallelize_loops ())
 {
   fun->curr_properties &= ~(PROP_gimple_eomp);
...
seems to fix PR 68373 - "autopar fails on loop exit phi with argument 
defined outside loop".


The new scev_const_prop call in autopar rewrites this phi into an 
assignment, and that allows parloops to succeed:

...
final value replacement:
  n_2 = PHI 
  with
  n_2 = n_4(D);
...

Thanks,
- Tom


[gomp4, committed] Remove TODO_remove_unused_locals from todo_flags_finish of pass_data_expand_omp_ssa

2015-11-09 Thread Tom de Vries

Hi,

this patch removes TODO_remove_unused_locals from todo_flags_finish of 
pass_data_expand_omp_ssa. I can't reproduce the problem 
TODO_remove_unused_locals is supposed to fix with the current state of 
gomp-4_0-branch.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove TODO_remove_unused_locals from todo_flags_finish of pass_data_expand_omp_ssa

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (pass_data_expand_omp_ssa): Remove TODO_remove_unused_locals
	from todo_flags_finish.
---
 gcc/omp-low.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b78a8d6..4d6dc99 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13641,8 +13641,7 @@ const pass_data pass_data_expand_omp_ssa =
   PROP_gimple_eomp, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
-  TODO_cleanup_cfg | TODO_rebuild_alias
-  | TODO_remove_unused_locals, /* todo_flags_finish */
+  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
 };
 
 class pass_expand_omp_ssa : public gimple_opt_pass
-- 
1.9.1



[gomp4, committed] Provide PROP_gimple_eomp unconditionally in pass_expand_omp

2015-11-09 Thread Tom de Vries

Hi,

now that we don't postpone expanding the kernels region anymore, we can 
set PROP_gimple_eomp unconditionally.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Provide PROP_gimple_eomp unconditionally in pass_expand_omp

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (pass_data_expand_omp): Add PROP_gimple_eomp to
	properties_provided.
	(pass_expand_omp::execute): Don't set PROP_gimple_eomp.
---
 gcc/omp-low.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 4d6dc99..cd9c9e6 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13589,7 +13589,7 @@ const pass_data pass_data_expand_omp =
   OPTGROUP_NONE, /* optinfo_flags */
   TV_NONE, /* tv_id */
   PROP_gimple_any, /* properties_required */
-  0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */
+  PROP_gimple_eomp, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
   0, /* todo_flags_finish */
@@ -13603,14 +13603,12 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual unsigned int execute (function *fun)
+  virtual unsigned int execute (function *)
 {
   bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
 		|| flag_openmp_simd != 0)
 		   && !seen_error ());
 
-  fun->curr_properties |= PROP_gimple_eomp;
-
   /* This pass always runs, to provide PROP_gimple_eomp.
 	 But often, there is nothing to do.  */
   if (!gate)
-- 
1.9.1



[gomp4, committed] Remove ssa support in expand_omp_target

2015-11-09 Thread Tom de Vries

Hi,

now that we don't postpone expanding the kernels region anymore, we 
don't need ssa support in expand_omp_target.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove ssa support in expand_omp_target

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (release_first_vuse_in_edge_dest): Remove
	(expand_omp_target): Remove ssa support.
---
 gcc/omp-low.c | 74 ---
 1 file changed, 5 insertions(+), 69 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index cd9c9e6..ed7640a 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6973,38 +6973,6 @@ expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
 }
 }
 
-/* Release the first vuse in bb E->dest, either normal or phi arg for
-   edge E.  */
-
-static void
-release_first_vuse_in_edge_dest (edge e)
-{
-  gimple_stmt_iterator i;
-  basic_block bb = e->dest;
-
-  for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next ())
-{
-  gimple *phi = gsi_stmt (i);
-  tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
-
-  if (!virtual_operand_p (arg))
-	continue;
-
-  mark_virtual_operand_for_renaming (arg);
-  return;
-}
-
-  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next_nondebug ())
-{
-  gimple *stmt = gsi_stmt (i);
-  if (gimple_vuse (stmt) == NULL_TREE)
-	continue;
-
-  mark_virtual_operand_for_renaming (gimple_vuse (stmt));
-  return;
-}
-}
-
 /* Expand the OpenMP parallel or task directive starting at REGION.  */
 
 static void
@@ -12727,6 +12695,7 @@ expand_omp_target (struct omp_region *region)
   /* Supported by expand_omp_taskreg, but not here.  */
   if (child_cfun != NULL)
 gcc_checking_assert (!child_cfun->cfg);
+  gcc_checking_assert (!gimple_in_ssa_p (cfun));
 
   entry_bb = region->entry;
   exit_bb = region->exit;
@@ -12735,7 +12704,7 @@ expand_omp_target (struct omp_region *region)
 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
 
   basic_block entry_succ_bb = single_succ (entry_bb);
-  if (offloaded && !gimple_in_ssa_p (cfun))
+  if (offloaded)
 {
   gsi = gsi_last_bb (entry_succ_bb);
   if (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ENTRY_END)
@@ -12793,25 +12762,8 @@ expand_omp_target (struct omp_region *region)
 	  gcc_assert (tgtcopy_stmt != NULL);
 	  arg = DECL_ARGUMENTS (child_fn);
 
-	  if (!gimple_in_ssa_p (cfun))
-	{
-	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
-	  gsi_remove (, true);
-	}
-	  else
-	{
-	  tree lhs = gimple_assign_lhs (tgtcopy_stmt);
-	  gcc_assert (SSA_NAME_VAR (lhs) == arg);
-	  /* We'd like to set the rhs to the default def in the child_fn,
-		 but it's too early to create ssa names in the child_fn.
-		 Instead, we set the rhs to the parm.  In
-		 move_sese_region_to_fn, we introduce a default def for the
-		 parm, map the parm to it's default def, and once we encounter
-		 this stmt, replace the parm with the default def.  */
-	  gimple_assign_set_rhs1 (tgtcopy_stmt, arg);
-	  gcc_assert (ssa_default_def (cfun, arg) == NULL);
-	  update_stmt (tgtcopy_stmt);
-	}
+	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
+	  gsi_remove (, true);
 	}
 
   /* Declare local variables needed in CHILD_CFUN.  */
@@ -12854,23 +12806,11 @@ expand_omp_target (struct omp_region *region)
 	  stmt = gimple_build_return (NULL);
 	  gsi_insert_after (, stmt, GSI_SAME_STMT);
 	  gsi_remove (, true);
-
-	  /* A vuse in single_succ (exit_bb) may use a vdef from the region
-	 which is about to be split off.  Mark the vdef for renaming.  */
-	  release_first_vuse_in_edge_dest (single_succ_edge (exit_bb));
 	}
 
   /* Move the offloading region into CHILD_CFUN.  */
 
-  if (gimple_in_ssa_p (cfun))
-	{
-	  init_tree_ssa (child_cfun);
-	  init_ssa_operands (child_cfun);
-	  child_cfun->gimple_df->in_ssa_p = true;
-	  block = NULL_TREE;
-	}
-  else
-	block = gimple_block (entry_stmt);
+  block = gimple_block (entry_stmt);
 
   new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
   if (exit_bb)
@@ -12937,8 +12877,6 @@ expand_omp_target (struct omp_region *region)
 	  if (changed)
 	cleanup_tree_cfg ();
 	}
-  if (gimple_in_ssa_p (cfun))
-	update_ssa (TODO_update_ssa);
   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
 	verify_loop_structure ();
   pop_cfun ();
@@ -13257,8 +13195,6 @@ expand_omp_target (struct omp_region *region)
   gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
   gsi_remove (, true);
 }
-  if (gimple_in_ssa_p (cfun))
-update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 /* Expand the parallel region tree rooted at REGION.  Expansion
-- 
1.9.1



[gomp4, committed] Cleanup formatting of pass_expand_omp_ssa::execute

2015-11-09 Thread Tom de Vries

Hi,

this patch makes the formatting of pass_expand_omp_ssa::execute 
identical to the one in trunk.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Cleanup formatting of pass_expand_omp_ssa::execute

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (pass_expand_omp_ssa::execute): Cleanup formatting.
---
 gcc/omp-low.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 57ac2aa..b78a8d6 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13657,10 +13657,7 @@ public:
 {
   return !(fun->curr_properties & PROP_gimple_eomp);
 }
-  virtual unsigned int execute (function *)
-{
-  return execute_expand_omp ();
-}
+  virtual unsigned int execute (function *) { return execute_expand_omp (); }
   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
 
 }; // class pass_expand_omp_ssa
-- 
1.9.1



[PATCH] Allow more pointer-plus folding

2015-10-30 Thread Tom de Vries
[ was: Re: [PATCH] Don't handle CAST_RESTRICT (PR 
tree-optimization/49279)  ]


On 29/10/15 12:38, Richard Biener wrote:

On Thu, Oct 29, 2015 at 11:38 AM, Tom de Vries <tom_devr...@mentor.com> wrote:

[ quote-pasted from https://gcc.gnu.org/ml/gcc-patches/2011-10/msg00464.html
]


CAST_RESTRICT based disambiguation unfortunately isn't reliable,
e.g. to store a non-restrict pointer into a restricted field,
we add a non-useless cast to restricted pointer in the gimplifier,
and while we don't consider that field to have a special restrict tag
because it is unsafe to do so, we unfortunately create it for the
CAST_RESTRICT before that and end up with different restrict tags
for the same thing.  See the PR for more details.

This patch turns off CAST_RESTRICT handling for now, in the future
we might try to replace it by explicit CAST_RESTRICT stmts in some form,
but need to solve problems with multiple inlined copies of the same
function
with restrict arguments or restrict variables in it and intermixed code
from
them (or similarly code from different non-overlapping source blocks).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
4.6 too?

2011-10-06  Jakub Jelinek  <ja...@redhat.com>

 PR tree-optimization/49279
 * tree-ssa-structalias.c (find_func_aliases): Don't handle
 CAST_RESTRICT.
 * tree-ssa-forwprop.c (forward_propagate_addr_expr_1): Allow
 restrict propagation.
 * tree-ssa.c (useless_type_conversion_p): Don't return false
 if TYPE_RESTRICT differs.

 * gcc.dg/tree-ssa/restrict-4.c: XFAIL.
 * gcc.c-torture/execute/pr49279.c: New test.



Hi,

In the patch adding support for CAST_RESTRICT (
https://gcc.gnu.org/ml/gcc-patches/2011-10/msg00176.html ) there was also a
bit:
...
 * fold-const.c (fold_unary_loc): Don't optimize
 POINTER_PLUS_EXPR casted to TYPE_RESTRICT pointer by
 casting the inner pointer if it isn't TYPE_RESTRICT.
...
which is still around. I suppose we can remove this bit as well.

OK for trunk if bootstrap and reg-test succeeds?


Ok.


Committed.


I think the checks on TREE_OPERAND (arg0, 1) are bogus though
and either we should unconditionally sink the conversion or only
if a conversion on TREE_OPERAND (arg0, 0) vanishes (I prefer the
latter).



Like this? OK for trunk if bootstrap/reg-test succeeds?

Thanks,
- Tom

Allow more pointer-plus folding

2015-10-30  Tom de Vries  <t...@codesourcery.com>

	* fold-const.c (fold_unary_loc): Allow more POINTER_PLUS_EXPR folding.
---
 gcc/fold-const.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 47ed609..6763e80 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -7770,9 +7770,7 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
 	 that this happens when X or Y is NOP_EXPR or Y is INTEGER_CST. */
   if (POINTER_TYPE_P (type)
 	  && TREE_CODE (arg0) == POINTER_PLUS_EXPR
-	  && (TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
-	  || TREE_CODE (TREE_OPERAND (arg0, 0)) == NOP_EXPR
-	  || TREE_CODE (TREE_OPERAND (arg0, 1)) == NOP_EXPR))
+	  && TREE_CODE (TREE_OPERAND (arg0, 0)) == NOP_EXPR)
 	{
 	  tree arg00 = TREE_OPERAND (arg0, 0);
 	  tree arg01 = TREE_OPERAND (arg0, 1);
-- 
1.9.1



[gomp4, committed] Remove shadowing declaration in oacc_entry_exit_ok_1

2015-11-03 Thread Tom de Vries
[ was: Re: [committed, gomp4, 2/3] Handle sequential code in kernels 
region ]


On 12/10/15 19:26, Tom de Vries wrote:

On 12/10/15 19:12, Tom de Vries wrote:

Hi,

I've committed the following patch series.

  1Add get_bbs_in_oacc_kernels_region
  2Handle sequential code in kernels region
  3Handle sequential code in kernels region - Testcases

The patch series adds detection of whether sequential code (that is,
code in the oacc kernels region before and after the loop that is to be
parallelized), is safe to execute in parallel.

Bootstrapped and reg-tested on x86_64.

I'll post the patches individually, in reply to this email.


This patch checks in parloops, for each non-loop stmt in the oacc
kernels region, that it's not a load aliasing with a store anywhere in
the region, and vice versa.

An exception are loads and stores for reductions, which are later-on
transformed into an atomic update.



I ran into an ICE in oacc kernels testcases when doing a non-bootstrap 
build and test. The ICE was caused by an uninitialized variable, which 
was uninitialized because the intended initialization was absorbed by a 
shadowing variable declaration.


This patch removes the shadowing declaration.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove shadowing declaration in oacc_entry_exit_ok_1

2015-11-03  Tom de Vries  <t...@codesourcery.com>

	* tree-parloops.c (oacc_entry_exit_ok_1): Remove shadowing declaration
	of ref.
---
 gcc/tree-parloops.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index a144f2d..f14cf8a 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2976,7 +2976,6 @@ oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec region_bbs,
 	}
 	  else if (gimple_store_p (stmt))
 	{
-	  ao_ref ref;
 	  ao_ref_init (, gimple_assign_lhs (stmt));
 	  ref_is_store = true;
 	}
-- 
1.9.1



[gomp4, committed] Backport make_restrict_var_constraints fixes from trunk

2015-11-03 Thread Tom de Vries

Hi,

I've ported two recent commits in make_restrict_var_constraints 
tree-ssa-structalias.c on trunk to gomp-4_0-branch.


Committed as attached to gomp-4_0-branch.

Thanks,
- Tom
Backport make_restrict_var_constraints fixes from trunk

2015-11-03  Tom de Vries  <t...@codesourcery.com>

	backport from trunk:
	2015-11-03  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (make_restrict_var_constraints): Rename to ...
	(make_param_constraints): ... this.  Add and handle restrict_name
	parameter.  Handle is_full_var case.
	(intra_create_variable_infos): Use make_param_constraints.

	* tree-ssa-structalias.c (make_restrict_var_constraints): Replace
	make_copy_constraint call with make_constraint_from call.
---
 gcc/ChangeLog  | 12 
 gcc/tree-ssa-structalias.c | 33 ++---
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 70895a6..b28bf54 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2015-11-03  Tom de Vries  <t...@codesourcery.com>
+
+	* tree-ssa-structalias.c (make_restrict_var_constraints): Rename to ...
+	(make_param_constraints): ... this.  Add and handle restrict_name
+	parameter.  Handle is_full_var case.
+	(intra_create_variable_infos): Use make_param_constraints.
+
+2015-11-03  Tom de Vries  <t...@codesourcery.com>
+
+	* tree-ssa-structalias.c (make_restrict_var_constraints): Replace
+	make_copy_constraint call with make_constraint_from call.
+
 2015-08-29  Anatoly Sokolov  <ae...@post.ru>
 
 	* config/mcore/mcore.h (REG_OK_FOR_BASE_P, REG_OK_FOR_INDEX_P,
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index d409727..f4c875f 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5892,19 +5892,22 @@ debug_solution_for_var (unsigned int var)
   dump_solution_for_var (stderr, var);
 }
 
-/* Register the constraints for restrict var VI.  */
+/* Register the constraints for function parameter related VI.  Use RESTRICT_NAME
+   as the base name of created restrict vars.  */
 
 static void
-make_restrict_var_constraints (varinfo_t vi)
+make_param_constraints (varinfo_t vi, const char *restrict_name)
 {
   for (; vi; vi = vi_next (vi))
-if (vi->may_have_pointers)
-  {
-	if (vi->only_restrict_pointers)
-	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT", true);
-	else
-	  make_copy_constraint (vi, nonlocal_id);
-  }
+{
+  if (vi->only_restrict_pointers)
+	make_constraint_from_global_restrict (vi, restrict_name, true);
+  else if (vi->may_have_pointers)
+	make_constraint_from (vi, nonlocal_id);
+
+  if (vi->is_full_var)
+	break;
+}
 }
 
 /* Create varinfo structures for all of the variables in the
@@ -5941,19 +5944,11 @@ intra_create_variable_infos (struct function *fn)
 	  vi->is_restrict_var = 1;
 	  insert_vi_for_tree (heapvar, vi);
 	  make_constraint_from (p, vi->id);
-	  make_restrict_var_constraints (vi);
+	  make_param_constraints (vi, "GLOBAL_RESTRICT");
 	  continue;
 	}
 
-  for (; p; p = vi_next (p))
-	{
-	  if (p->only_restrict_pointers)
-	make_constraint_from_global_restrict (p, "PARM_RESTRICT", true);
-	  else if (p->may_have_pointers)
-	make_constraint_from (p, nonlocal_id);
-	  if (p->is_full_var)
-	break;
-	}
+  make_param_constraints (p, "PARM_RESTRICT");
 }
 
   /* Add a constraint for a result decl that is passed by reference.  */
-- 
1.9.1



[PATCH, 2/2] Handle recursive restrict in function parameter

2015-11-01 Thread Tom de Vries

On 01/11/15 19:03, Tom de Vries wrote:

So, the new patch series is:

  1Rename make_restrict_var_constraints to make_param_constraints
  2Handle recursive restrict in function parameter

I'll repost in reply to this message.



This patch adds handling of all the restrict qualifiers in the type of a 
function parameter.


Thanks,
- Tom

Handle recursive restrict in function parameter

	* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
	(push_fields_onto_fieldstack): Add and handle handle_param parameter.
	(create_variable_info_for_1): Add and handle
	handle_param parameter.  Add extra arg to call to
	push_fields_onto_fieldstack.  Handle restrict pointer fields.
	(create_variable_info_for): Call create_variable_info_for_1 with extra
	arg.
	(make_param_constraints): Drop restrict_name parameter.  Ignore
	vi->only_restrict_pointers.
	(intra_create_variable_infos): Call create_variable_info_for_1 with
	extra arg.  Remove restrict handling.  Call make_param_constraints with
	one less arg.

	* gcc.dg/tree-ssa/restrict-7.c: New test.
	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 +
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 ++
 gcc/tree-ssa-structalias.c | 87 ++
 3 files changed, 82 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
new file mode 100644
index 000..f7a68c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int
+f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
+{
+  *b = 1;
+  ***a  = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new file mode 100644
index 000..b0ab164
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+struct s
+{
+  int *__restrict__ *__restrict__ pp;
+};
+
+int
+f (struct s s, int *b)
+{
+  *b = 1;
+  **s.pp = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index ea34764..f4e9b0a 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -320,6 +320,7 @@ static varinfo_t first_or_preceding_vi_for_offset (varinfo_t,
 		   unsigned HOST_WIDE_INT);
 static varinfo_t lookup_vi_for_tree (tree);
 static inline bool type_can_have_subvars (const_tree);
+static void make_param_constraints (varinfo_t, bool);
 
 /* Pool of variable info structures.  */
 static object_allocator variable_info_pool
@@ -406,6 +407,7 @@ new_var_info (tree t, const char *name, bool add_id)
   return ret;
 }
 
+static varinfo_t create_variable_info_for_1 (tree, const char *, bool, bool);
 
 /* A map mapping call statements to per-stmt variables for uses
and clobbers specific to the call.  */
@@ -5208,6 +5210,8 @@ struct fieldoff
   unsigned may_have_pointers : 1;
 
   unsigned only_restrict_pointers : 1;
+
+  varinfo_t restrict_var;
 };
 typedef struct fieldoff fieldoff_s;
 
@@ -5302,11 +5306,12 @@ field_must_have_pointers (tree t)
OFFSET is used to keep track of the offset in this entire
structure, rather than just the immediately containing structure.
Returns false if the caller is supposed to handle the field we
-   recursed for.  */
+   recursed for.  If HANDLE_PARAM is set, we're handling part of a function
+   parameter.  */
 
 static bool
 push_fields_onto_fieldstack (tree type, vec *fieldstack,
-			 HOST_WIDE_INT offset)
+			 HOST_WIDE_INT offset, bool handle_param)
 {
   tree field;
   bool empty_p = true;
@@ -5332,7 +5337,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	|| TREE_CODE (field_type) == UNION_TYPE)
 	  push = true;
 	else if (!push_fields_onto_fieldstack
-		(field_type, fieldstack, offset + foff)
+		(field_type, fieldstack, offset + foff, handle_param)
 		 && (DECL_SIZE (field)
 		 && !integer_zerop (DECL_SIZE (field
 	  /* Empty structures may have actual size, like in C++.  So
@@ -5353,7 +5358,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	if (!pair
 		&& offset + foff != 0)
 	  {
-		fieldoff_s e = {0, offset + foff, false, false, false, false};
+		fieldoff_s e = {0, offset + foff, false, false, false, false,
+NULL};
 		pair = fieldstack->safe_push (e);
 	  }
 
@@ -5387,6 +5393,19 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 		  = (!has_unknown_size
 		 

Re: [PATCH, 3/6] Add recursion to make_param_constraints

2015-11-01 Thread Tom de Vries

On 01/11/15 19:03, Tom de Vries wrote:


So, the new patch series is:

  1Rename make_restrict_var_constraints to make_param_constraints
  2Handle recursive restrict in function parameter

I'll repost in reply to this message.


This no-functional-changes patch:
- moves the one constraint handling loop left in
   intra_create_variable_infos to make_restrict_var_constraints
- renames make_restrict_var_constraints to make_param_constraints
- adds a parameter toplevel to make_param_constraints to distinguish
   between the two calling contexts
- adds a parmeter restrict_name that allows to pass in the name of
  restrict vars

This patch was posted before at 
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg03111.html .


Thanks,
- Tom

Rename make_restrict_var_constraints to make_param_constraints

2015-10-27  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (make_restrict_var_constraints): Rename to ...
	(make_param_constraints): ... this.  Add toplevel and restrict_name
	parameter.
	(intra_create_variable_infos): Use make_param_constraints.
---
 gcc/tree-ssa-structalias.c | 39 ---
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index d409727..ea34764 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5892,19 +5892,28 @@ debug_solution_for_var (unsigned int var)
   dump_solution_for_var (stderr, var);
 }
 
-/* Register the constraints for restrict var VI.  */
+/* Register the constraints for VI.  If TOPLEVEL then VI is a function
+   parameter, otherwise VI is part of a function parameter.  Use RESTRICT_NAME
+   as the base name of created restrict vars.  */
 
 static void
-make_restrict_var_constraints (varinfo_t vi)
+make_param_constraints (varinfo_t vi, bool toplevel, const char *restrict_name)
 {
   for (; vi; vi = vi_next (vi))
-if (vi->may_have_pointers)
-  {
-	if (vi->only_restrict_pointers)
-	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT", true);
-	else
-	  make_copy_constraint (vi, nonlocal_id);
-  }
+{
+  if (vi->only_restrict_pointers)
+	make_constraint_from_global_restrict (vi, restrict_name, true);
+  else if (vi->may_have_pointers)
+	{
+	  if (toplevel)
+	make_constraint_from (vi, nonlocal_id);
+	  else
+	make_copy_constraint (vi, nonlocal_id);
+	}
+
+if (vi->is_full_var)
+  break;
+}
 }
 
 /* Create varinfo structures for all of the variables in the
@@ -5941,19 +5950,11 @@ intra_create_variable_infos (struct function *fn)
 	  vi->is_restrict_var = 1;
 	  insert_vi_for_tree (heapvar, vi);
 	  make_constraint_from (p, vi->id);
-	  make_restrict_var_constraints (vi);
+	  make_param_constraints (vi, false, "GLOBAL_RESTRICT");
 	  continue;
 	}
 
-  for (; p; p = vi_next (p))
-	{
-	  if (p->only_restrict_pointers)
-	make_constraint_from_global_restrict (p, "PARM_RESTRICT", true);
-	  else if (p->may_have_pointers)
-	make_constraint_from (p, nonlocal_id);
-	  if (p->is_full_var)
-	break;
-	}
+  make_param_constraints (p, true, "PARM_RESTRICT");
 }
 
   /* Add a constraint for a result decl that is passed by reference.  */
-- 
1.9.1



Re: [PATCH, 3/6] Add recursion to make_param_constraints

2015-11-01 Thread Tom de Vries

On 27/10/15 13:56, Tom de Vries wrote:

On 27/10/15 13:24, Tom de Vries wrote:

Thinking it over a bit more, I realized the constraint handling started
to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in
create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.



This patch:
- registers the connection between a restrict pointer var and a
   restrict var in a new hash_map restrict_pointed_var.
- move the restrict pointer constraint handling from
   intra_create_variable_infos to make_param_constraints

The result of this and the two preceding patches is that the constraint
handling for params in intra_create_variable_infos is reduced to a
single call to make_param_constraints.


I've managed to eliminate this patch from the patch series, at the cost 
of having to merge patches 4-6 into a single patch, rather than having a 
more stepwise approach.


So, the new patch series is:

 1  Rename make_restrict_var_constraints to make_param_constraints
 2  Handle recursive restrict in function parameter

I'll repost in reply to this message.

Thanks,
- Tom


[gomp4, committed] Backport tree-ssa-structalias.c fixes from trunk

2015-10-30 Thread Tom de Vries

Hi,

this patch backports my commits to trunk of this week in 
tree-ssa-structalias.c.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Backport tree-ssa-structalias.c fixes from trunk

2015-10-30  Tom de Vries  <t...@codesourcery.com>

	backport from trunk:
	2015-10-30  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (ipa_pta_execute): Declare variable from as
	unsigned, and initialize, and use initial value instead of hardcoded
	constant.  Add generic constraints dumping section.  Don't dump global
	initializers constraints dumping section if empty.  Don't update
	variable from if unused.

	2015-10-28  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (intra_create_variable_infos): Remove
	superfluous code.

	* tree-ssa-structalias.c (intra_create_variable_infos): Don't iterate
	into vi_next of a full_var.

	* tree-ssa-structalias.c (new_var_info, make_heapvar)
	(make_constraint_from_restrict, make_constraint_from_global_restrict)
	(create_function_info_for, create_variable_info_for_1)
	(create_variable_info_for): Add and handle add_id parameter.
	(get_call_vi, new_scalar_tmp_constraint_exp, handle_rhs_call)
	(init_base_vars): Add extra argument to calls to new_var_info.
	(get_vi_for_tree): Add extra argument to call to
	create_variable_info_for.
	(process_constraint, do_deref, process_all_all_constraints): Add extra
	argument to calls to new_scalar_tmp_constraint_exp.
	(handle_lhs_call, find_func_aliases_for_builtin_call): Add extra
	argument to calls to make_heapvar.
	(make_restrict_var_constraints): Add extra argument to call to
	make_constraint_from_global_restrict.
	(intra_create_variable_infos): Add extra argument to call to
	create_variable_info_for_1.
	(ipa_pta_execute): Add extra argument to call to
	create_function_info_for.

	* gcc.dg/tree-ssa/pta-callused.c: Update to scan for CALLUSED(id).

	2015-10-27  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (push_fields_onto_fieldstack): Add and use var
	field_type.

	2015-10-26  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (make_restrict_var_constraints): New function,
	factored out of ...
	(intra_create_variable_infos): ... here.

	* tree-ssa-structalias.c (intra_create_variable_infos): Add
	restrict_pointer_p and recursive_restrict_p variables.

	* tree-ssa-structalias.c (intra_create_variable_infos): Inline
	get_vi_for_tree call.

	2015-10-23  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (intra_create_variable_infos): Use
	make_constraint_from.

	* tree-ssa-structalias.c (create_variable_info_for_1): Add missing
	setting of is_full_var in case of a single field.
---
 gcc/ChangeLog.gomp   |  65 +
 gcc/testsuite/ChangeLog.gomp |   7 +
 gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c |   2 +-
 gcc/tree-ssa-structalias.c   | 203 +++
 4 files changed, 189 insertions(+), 88 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c b/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c
index 59408fa..b9a57d8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c
@@ -22,5 +22,5 @@ int bar (int b)
   return *foo ();
 }
 
-/* { dg-final { scan-tree-dump "CALLUSED = { ESCAPED NONLOCAL f.* i q }" "alias" } } */
+/* { dg-final { scan-tree-dump "CALLUSED\\(\[0-9\]+\\) = { ESCAPED NONLOCAL f.* i q }" "alias" } } */
 
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 8d86dcb..f5e17a3 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -220,7 +220,7 @@ static bitmap_obstack oldpta_obstack;
 /* Used for per-solver-iteration bitmaps.  */
 static bitmap_obstack iteration_obstack;
 
-static unsigned int create_variable_info_for (tree, const char *);
+static unsigned int create_variable_info_for (tree, const char *, bool);
 typedef struct constraint_graph *constraint_graph_t;
 static void unify_nodes (constraint_graph_t, unsigned int, unsigned int, bool);
 
@@ -361,11 +361,18 @@ enum { nothing_id = 1, anything_id = 2, string_id = 3,
to the vector of variable info structures.  */
 
 static varinfo_t
-new_var_info (tree t, const char *name)
+new_var_info (tree t, const char *name, bool add_id)
 {
   unsigned index = varmap.length ();
   varinfo_t ret = variable_info_pool.allocate ();
 
+  if (dump_file && add_id)
+{
+  char *tempname = xasprintf ("%s(%d)", name, index);
+  name = ggc_strdup (tempname);
+  free (tempname);
+}
+
   ret->id = index;
   ret->name = name;
   ret->decl = t;
@@ -416,13 +423,13 @@ get_call_vi (gcall *call)
   if (existed)
 return *slot_p;
 
-  vi = new_var_info (NULL_TREE, "CALLUSED");
+  vi = new_var_info (NULL_TREE, "CALLUSED", true);
   vi->offset = 0;
   vi->size = 1;
   vi->fullsize = 2;
   vi->is_

[gomp4, committed] Backport more tree-ssa-structalias.c fixes from trunk

2015-10-31 Thread Tom de Vries

Hi,

this patch backports my commits to trunk of today in tree-ssa-structalias.c.

Committed to gomp-4_0-branch.

Thanks,
- Tom
2015-10-31  Tom de Vries  <t...@codesourcery.com>

	backport from trunk:
	2015-10-31  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (intra_create_variable_infos): Don't expect
	existing varinfo for arguments.

	* tree-ssa-structalias.c (ipa_pta_execute): Add extra arg to call to
	create_function_info_for.  Dump constraints generated during
	create_function_info_for. Move intra_create_variable_infos call and
	function-return-values-escape bit to ...
	(create_function_info_for): ... here, and merge
	intra_create_variable_infos call with argument loop.  Add and handle
	nonlocal_p parameter.

	* tree-ssa-structalias.c (create_function_info_for): Make sure prev_vi
	updating is alap, and seperated from preceding code.  Make sure
	insert_vi_for_tree is seperated from surrounding code.

	* tree-ssa-structalias.c (ipa_pta_execute): Use make_copy_constraint.
---
 gcc/tree-ssa-structalias.c | 121 +++--
 1 file changed, 72 insertions(+), 49 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 55608ef..d409727 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5422,10 +5422,12 @@ count_num_arguments (tree decl, bool *is_varargs)
 }
 
 /* Creation function node for DECL, using NAME, and return the index
-   of the variable we've created for the function.  */
+   of the variable we've created for the function.  If NONLOCAL_p, create
+   initial constraints.  */
 
 static varinfo_t
-create_function_info_for (tree decl, const char *name, bool add_id)
+create_function_info_for (tree decl, const char *name, bool add_id,
+			  bool nonlocal_p)
 {
   struct function *fn = DECL_STRUCT_FUNCTION (decl);
   varinfo_t vi, prev_vi;
@@ -5465,6 +5467,7 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   clobbervi->fullsize = vi->fullsize;
   clobbervi->is_full_var = true;
   clobbervi->is_global_var = false;
+
   gcc_assert (prev_vi->offset < clobbervi->offset);
   prev_vi->next = clobbervi->id;
   prev_vi = clobbervi;
@@ -5479,6 +5482,7 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   usevi->fullsize = vi->fullsize;
   usevi->is_full_var = true;
   usevi->is_global_var = false;
+
   gcc_assert (prev_vi->offset < usevi->offset);
   prev_vi->next = usevi->id;
   prev_vi = usevi;
@@ -5501,10 +5505,16 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   chainvi->fullsize = vi->fullsize;
   chainvi->is_full_var = true;
   chainvi->is_global_var = false;
+
+  insert_vi_for_tree (fn->static_chain_decl, chainvi);
+
+  if (nonlocal_p
+	  && chainvi->may_have_pointers)
+	make_constraint_from (chainvi, nonlocal_id);
+
   gcc_assert (prev_vi->offset < chainvi->offset);
   prev_vi->next = chainvi->id;
   prev_vi = chainvi;
-  insert_vi_for_tree (fn->static_chain_decl, chainvi);
 }
 
   /* Create a variable for the return var.  */
@@ -5530,11 +5540,25 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   resultvi->is_full_var = true;
   if (DECL_RESULT (decl))
 	resultvi->may_have_pointers = true;
+
+  if (DECL_RESULT (decl))
+	insert_vi_for_tree (DECL_RESULT (decl), resultvi);
+
   gcc_assert (prev_vi->offset < resultvi->offset);
   prev_vi->next = resultvi->id;
   prev_vi = resultvi;
-  if (DECL_RESULT (decl))
-	insert_vi_for_tree (DECL_RESULT (decl), resultvi);
+}
+
+  /* We also need to make function return values escape.  Nothing
+ escapes by returning from main though.  */
+  if (nonlocal_p
+  && !MAIN_NAME_P (DECL_NAME (decl)))
+{
+  varinfo_t fi, rvi;
+  fi = lookup_vi_for_tree (decl);
+  rvi = first_vi_for_offset (fi, fi_result);
+  if (rvi && rvi->offset == fi_result)
+	make_copy_constraint (get_varinfo (escaped_id), rvi->id);
 }
 
   /* Set up variables for each argument.  */
@@ -5560,14 +5584,19 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   argvi->fullsize = vi->fullsize;
   if (arg)
 	argvi->may_have_pointers = true;
+
+  if (arg)
+	insert_vi_for_tree (arg, argvi);
+
+  if (nonlocal_p
+	  && argvi->may_have_pointers)
+	make_constraint_from (argvi, nonlocal_id);
+
   gcc_assert (prev_vi->offset < argvi->offset);
   prev_vi->next = argvi->id;
   prev_vi = argvi;
   if (arg)
-	{
-	  insert_vi_for_tree (arg, argvi);
-	  arg = DECL_CHAIN (arg);
-	}
+	arg = DECL_CHAIN (arg);
 }
 
   /* Add one representative for all further args.  */
@@ -5591,6 +5620,11 @@ create_function_info_for (tree decl, const

[committed, trivial] Improve readability and structure of create_function_info_for

2015-10-31 Thread Tom de Vries

Hi,

this patch improves readability and structure of function 
create_function_info_for.


Bootstrapped and reg-tested on x86_64.

Committed to trunk as trivial.

Thanks,
- Tom
Improve readability and structure of create_function_info_for

2015-10-30  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (create_function_info_for): Make sure prev_vi
	updating is alap, and seperated from preceding code.  Make sure
	insert_vi_for_tree is seperated from surrounding code.
---
 gcc/tree-ssa-structalias.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index f142ca0..5195eb39 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5465,6 +5465,7 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   clobbervi->fullsize = vi->fullsize;
   clobbervi->is_full_var = true;
   clobbervi->is_global_var = false;
+
   gcc_assert (prev_vi->offset < clobbervi->offset);
   prev_vi->next = clobbervi->id;
   prev_vi = clobbervi;
@@ -5479,6 +5480,7 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   usevi->fullsize = vi->fullsize;
   usevi->is_full_var = true;
   usevi->is_global_var = false;
+
   gcc_assert (prev_vi->offset < usevi->offset);
   prev_vi->next = usevi->id;
   prev_vi = usevi;
@@ -5501,10 +5503,12 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   chainvi->fullsize = vi->fullsize;
   chainvi->is_full_var = true;
   chainvi->is_global_var = false;
+
+  insert_vi_for_tree (fn->static_chain_decl, chainvi);
+
   gcc_assert (prev_vi->offset < chainvi->offset);
   prev_vi->next = chainvi->id;
   prev_vi = chainvi;
-  insert_vi_for_tree (fn->static_chain_decl, chainvi);
 }
 
   /* Create a variable for the return var.  */
@@ -5530,11 +5534,13 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   resultvi->is_full_var = true;
   if (DECL_RESULT (decl))
 	resultvi->may_have_pointers = true;
+
+  if (DECL_RESULT (decl))
+	insert_vi_for_tree (DECL_RESULT (decl), resultvi);
+
   gcc_assert (prev_vi->offset < resultvi->offset);
   prev_vi->next = resultvi->id;
   prev_vi = resultvi;
-  if (DECL_RESULT (decl))
-	insert_vi_for_tree (DECL_RESULT (decl), resultvi);
 }
 
   /* Set up variables for each argument.  */
@@ -5560,14 +5566,15 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   argvi->fullsize = vi->fullsize;
   if (arg)
 	argvi->may_have_pointers = true;
+
+  if (arg)
+	insert_vi_for_tree (arg, argvi);
+
   gcc_assert (prev_vi->offset < argvi->offset);
   prev_vi->next = argvi->id;
   prev_vi = argvi;
   if (arg)
-	{
-	  insert_vi_for_tree (arg, argvi);
-	  arg = DECL_CHAIN (arg);
-	}
+	arg = DECL_CHAIN (arg);
 }
 
   /* Add one representative for all further args.  */
@@ -5591,6 +5598,7 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   argvi->is_full_var = true;
   argvi->is_heap_var = true;
   argvi->fullsize = vi->fullsize;
+
   gcc_assert (prev_vi->offset < argvi->offset);
   prev_vi->next = argvi->id;
   prev_vi = argvi;
-- 
1.9.1



[committed, trivial] Don't expect existing varinfo for arguments in intra_create_variable_infos

2015-10-31 Thread Tom de Vries

[ was: Re: [PATCH, 1/6] Simplify constraint handling ]

On 31/10/15 09:19, Tom de Vries wrote:

On 30/10/15 10:33, Richard Biener wrote:

Yes, but as I said we should refactor things to avoid calling
> >the intra constraints generation from the IPA path.

>
>Ah, I see.
>
>So, like this? OK for trunk if bootstrap and reg-test succeeds?

Yes, like this.  But you miss to apply the same to the static chain,
and the varargs "rest".

Ok with that change.


Updated patch, bootstrapped and reg-tested on x86_64.

Committed to trunk as attached.



And now that we don't call intra_create_variable_infos anymore during 
ipa_pta_execute, we can simplify intra_create_variable_infos a bit.


Bootstrapped and reg-tested on x86_64.

Committed to trunk as trivial.

Thanks,
- Tom

Don't expect existing varinfo for arguments in intra_create_variable_infos

2015-10-30  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (intra_create_variable_infos): Don't expect
	existing varinfo for arguments.
---
 gcc/tree-ssa-structalias.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index b361096..d409727 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5925,12 +5925,8 @@ intra_create_variable_infos (struct function *fn)
   bool recursive_restrict_p
 	= (restrict_pointer_p
 	   && !type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (t;
-  varinfo_t p = lookup_vi_for_tree (t);
-  if (p == NULL)
-	{
-	  p = create_variable_info_for_1 (t, alias_get_name (t), false);
-	  insert_vi_for_tree (t, p);
-	}
+  varinfo_t p = create_variable_info_for_1 (t, alias_get_name (t), false);
+  insert_vi_for_tree (t, p);
 
   /* For restrict qualified pointers build a representative for
 	 the pointed-to object.  Note that this ends up handling
-- 
1.9.1



Re: [PATCH, 1/6] Simplify constraint handling

2015-10-31 Thread Tom de Vries

On 30/10/15 10:33, Richard Biener wrote:

Yes, but as I said we should refactor things to avoid calling
> >the intra constraints generation from the IPA path.

>
>Ah, I see.
>
>So, like this? OK for trunk if bootstrap and reg-test succeeds?

Yes, like this.  But you miss to apply the same to the static chain,
and the varargs "rest".

Ok with that change.


Updated patch, bootstrapped and reg-tested on x86_64.

Committed to trunk as attached.

Thanks,
- Tom
Add initial constraints in create_function_info_for

2015-10-29  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (ipa_pta_execute): Add extra arg to call to
	create_function_info_for.  Dump constraints generated during
	create_function_info_for. Move intra_create_variable_infos call and
	function-return-values-escape bit to ...
	(create_function_info_for): ... here, and merge
	intra_create_variable_infos call with argument loop.  Add and handle
	nonlocal_p parameter.
---
 gcc/tree-ssa-structalias.c | 81 +++---
 1 file changed, 55 insertions(+), 26 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 5195eb39..b361096 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5422,10 +5422,12 @@ count_num_arguments (tree decl, bool *is_varargs)
 }
 
 /* Creation function node for DECL, using NAME, and return the index
-   of the variable we've created for the function.  */
+   of the variable we've created for the function.  If NONLOCAL_p, create
+   initial constraints.  */
 
 static varinfo_t
-create_function_info_for (tree decl, const char *name, bool add_id)
+create_function_info_for (tree decl, const char *name, bool add_id,
+			  bool nonlocal_p)
 {
   struct function *fn = DECL_STRUCT_FUNCTION (decl);
   varinfo_t vi, prev_vi;
@@ -5506,6 +5508,10 @@ create_function_info_for (tree decl, const char *name, bool add_id)
 
   insert_vi_for_tree (fn->static_chain_decl, chainvi);
 
+  if (nonlocal_p
+	  && chainvi->may_have_pointers)
+	make_constraint_from (chainvi, nonlocal_id);
+
   gcc_assert (prev_vi->offset < chainvi->offset);
   prev_vi->next = chainvi->id;
   prev_vi = chainvi;
@@ -5543,6 +5549,18 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   prev_vi = resultvi;
 }
 
+  /* We also need to make function return values escape.  Nothing
+ escapes by returning from main though.  */
+  if (nonlocal_p
+  && !MAIN_NAME_P (DECL_NAME (decl)))
+{
+  varinfo_t fi, rvi;
+  fi = lookup_vi_for_tree (decl);
+  rvi = first_vi_for_offset (fi, fi_result);
+  if (rvi && rvi->offset == fi_result)
+	make_copy_constraint (get_varinfo (escaped_id), rvi->id);
+}
+
   /* Set up variables for each argument.  */
   arg = DECL_ARGUMENTS (decl);
   for (i = 0; i < num_args; i++)
@@ -5570,6 +5588,10 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   if (arg)
 	insert_vi_for_tree (arg, argvi);
 
+  if (nonlocal_p
+	  && argvi->may_have_pointers)
+	make_constraint_from (argvi, nonlocal_id);
+
   gcc_assert (prev_vi->offset < argvi->offset);
   prev_vi->next = argvi->id;
   prev_vi = argvi;
@@ -5599,6 +5621,10 @@ create_function_info_for (tree decl, const char *name, bool add_id)
   argvi->is_heap_var = true;
   argvi->fullsize = vi->fullsize;
 
+  if (nonlocal_p
+	  && argvi->may_have_pointers)
+	make_constraint_from (argvi, nonlocal_id);
+
   gcc_assert (prev_vi->offset < argvi->offset);
   prev_vi->next = argvi->id;
   prev_vi = argvi;
@@ -7325,8 +7351,34 @@ ipa_pta_execute (void)
 
   gcc_assert (!node->clone_of);
 
+  /* For externally visible or attribute used annotated functions use
+	 local constraints for their arguments.
+	 For local functions we see all callers and thus do not need initial
+	 constraints for parameters.  */
+  bool nonlocal_p = (node->used_from_other_partition
+			 || node->externally_visible
+			 || node->force_output
+			 || node->address_taken);
+
   vi = create_function_info_for (node->decl,
- alias_get_name (node->decl), false);
+ alias_get_name (node->decl), false,
+ nonlocal_p);
+  if (dump_file
+	  && from != constraints.length ())
+	{
+	  fprintf (dump_file,
+		   "Generating intial constraints for %s", node->name ());
+	  if (DECL_ASSEMBLER_NAME_SET_P (node->decl))
+	fprintf (dump_file, " (%s)",
+		 IDENTIFIER_POINTER
+		   (DECL_ASSEMBLER_NAME (node->decl)));
+	  fprintf (dump_file, "\n\n");
+	  dump_constraints (dump_file, from);
+	  fprintf (dump_file, "\n");
+
+	  from = constraints.length ();
+	}
+
   node->call_for_symbol_thunks_and_aliases
 	(associate_varinfo_to_alias, vi, true);

[committed, trivial] Use make_copy_constraint in ipa_pta_execute

2015-10-31 Thread Tom de Vries

Hi,

this patch gives us shorter code in ipa_pta_execute, by using 
make_copy_constraint.


Bootstrapped and reg-tested on x86_64.

Committed to trunk as trivial.

Thanks,
- Tom
Use make_copy_constraint in ipa_pta_execute

2015-10-30  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (ipa_pta_execute): Use make_copy_constraint.
---
 gcc/tree-ssa-structalias.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 55608ef..f142ca0 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -7384,17 +7384,7 @@ ipa_pta_execute (void)
 	  fi = lookup_vi_for_tree (node->decl);
 	  rvi = first_vi_for_offset (fi, fi_result);
 	  if (rvi && rvi->offset == fi_result)
-		{
-		  struct constraint_expr includes;
-		  struct constraint_expr var;
-		  includes.var = escaped_id;
-		  includes.offset = 0;
-		  includes.type = SCALAR;
-		  var.var = rvi->id;
-		  var.offset = 0;
-		  var.type = SCALAR;
-		  process_constraint (new_constraint (includes, var));
-		}
+		make_copy_constraint (get_varinfo (escaped_id), rvi->id);
 	}
 	}
 
-- 
1.9.1



[committed] Tune pointer-plus folding

2015-10-31 Thread Tom de Vries

[ was: Re: [PATCH] Allow more pointer-plus folding ]

On 30/10/15 10:24, Richard Biener wrote:

I think the checks on TREE_OPERAND (arg0, 1) are bogus though
>>and either we should unconditionally sink the conversion or only
>>if a conversion on TREE_OPERAND (arg0, 0) vanishes (I prefer the
>>latter).
>>

>
>Like this? OK for trunk if bootstrap/reg-test succeeds?

Ok with using CONVERT_EXPR_P (TREE_OPERAND (arg0, 0)) instead of
an explicit NOP_EXPR check.


Committed to trunk as attached, with:
- fold-const.c comment updated, and
- two test-cases updated (where we do less folding than before).

And I've changed the title of the commit since although we do allow more 
folding in some cases, we allow less folding in other cases.


Thanks,
- Tom
Tune pointer-plus folding

2015-10-30  Tom de Vries  <t...@codesourcery.com>

	* fold-const.c (fold_unary_loc): Tune POINTER_PLUS_EXPR folding.

	* gfortran.dg/assumed_type_2.f90: Update test.
	* gfortran.dg/no_arg_check_2.f90: Same.
---
 gcc/fold-const.c | 10 --
 gcc/testsuite/gfortran.dg/assumed_type_2.f90 |  2 +-
 gcc/testsuite/gfortran.dg/no_arg_check_2.f90 |  2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index b9168f3..197ccfd 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -7755,14 +7755,12 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
 	}
 	}
 
-  /* Convert (T1)(X p+ Y) into ((T1)X p+ Y), for pointer type,
- when one of the new casts will fold away. Conservatively we assume
-	 that this happens when X or Y is NOP_EXPR or Y is INTEGER_CST. */
+  /* Convert (T1)(X p+ Y) into ((T1)X p+ Y), for pointer type, when the new
+	 cast (T1)X will fold away.  We assume that this happens when X itself
+	 is a cast.  */
   if (POINTER_TYPE_P (type)
 	  && TREE_CODE (arg0) == POINTER_PLUS_EXPR
-	  && (TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
-	  || TREE_CODE (TREE_OPERAND (arg0, 0)) == NOP_EXPR
-	  || TREE_CODE (TREE_OPERAND (arg0, 1)) == NOP_EXPR))
+	  && CONVERT_EXPR_P (TREE_OPERAND (arg0, 0)))
 	{
 	  tree arg00 = TREE_OPERAND (arg0, 0);
 	  tree arg01 = TREE_OPERAND (arg0, 1);
diff --git a/gcc/testsuite/gfortran.dg/assumed_type_2.f90 b/gcc/testsuite/gfortran.dg/assumed_type_2.f90
index ec51b8b..f1a2074 100644
--- a/gcc/testsuite/gfortran.dg/assumed_type_2.f90
+++ b/gcc/testsuite/gfortran.dg/assumed_type_2.f90
@@ -155,7 +155,7 @@ end
 ! { dg-final { scan-tree-dump-times "sub_scalar .&\\(.\\(struct t2.0:. . restrict\\) array_t2_alloc.data" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "sub_scalar .&\\(.\\(struct t3.0:. .\\) array_t3_ptr.data" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) array_class_t1_alloc._data.data" 1 "original" } }
-! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) array_class_t1_ptr._data.dat" 1 "original" } }a
+! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) \\(array_class_t1_ptr._data.dat" 1 "original" } }
 
 ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(D" 3 "original" } }
 ! { dg-final { scan-tree-dump-times " = _gfortran_internal_pack \\(" 1 "original" } }
diff --git a/gcc/testsuite/gfortran.dg/no_arg_check_2.f90 b/gcc/testsuite/gfortran.dg/no_arg_check_2.f90
index 3645ded..b3fb468 100644
--- a/gcc/testsuite/gfortran.dg/no_arg_check_2.f90
+++ b/gcc/testsuite/gfortran.dg/no_arg_check_2.f90
@@ -137,7 +137,7 @@ end
 ! { dg-final { scan-tree-dump-times "sub_scalar .&\\(.\\(struct t2.0:. . restrict\\) array_t2_alloc.data" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "sub_scalar .&\\(.\\(struct t3.0:. .\\) array_t3_ptr.data" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) array_class_t1_alloc._data.data" 1 "original" } }
-! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) array_class_t1_ptr._data.dat" 1 "original" } }a
+! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) \\(array_class_t1_ptr._data.dat" 1 "original" } }
 
 ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(D" 3 "original" } }
 ! { dg-final { scan-tree-dump-times " = _gfortran_internal_pack \\(" 1 "original" } }
-- 
1.9.1



[gomp4, committed, 1/9] Move expansion of kernels region back to first omp-expand

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



This patch moves expansion of the kernels region back to the first 
omp-expand pass, before ssa.


There's no longer a need for the region-replacing call to expand into 
BUILT_IN_GOACC_KERNELS_INTERNAL and subsequently map onto 
BUILT_IN_GOACC_PARALLEL during the second omp-expand pass.  The 
BUILT_IN_GOACC_KERNELS_INTERNAL call was modeled transparant to alias 
analysis, and that's no longer needed.


Thanks,
- Tom
Move expansion of kernels region back to first omp-expand

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (expand_omp_target): Remove do_emit_library_call variable,
	assume true.  Remove do_splitoff variable, assume true.  Remove
	BUILT_IN_GOACC_KERNELS_INTERNAL handling.  Replace kernels region with a
	BUILT_IN_GOACC_PARALLEL call.
---
 gcc/omp-low.c | 57 -
 1 file changed, 4 insertions(+), 53 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 6bac074..ac8c8d0 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12563,8 +12563,6 @@ expand_omp_target (struct omp_region *region)
   gimple *stmt;
   edge e;
   bool offloaded, data_region;
-  bool do_emit_library_call = true;
-  bool do_splitoff = true;
 
   entry_stmt = as_a  (last_stmt (region->entry));
 
@@ -12608,43 +12606,7 @@ expand_omp_target (struct omp_region *region)
   exit_bb = region->exit;
 
   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-{
-  if (!gimple_in_ssa_p (cfun))
-	{
-	  /* We need to do analysis and optimizations on the kernels region
-	 before splitoff.  Since that's hard to do on low gimple, we
-	 postpone the splitoff until we're in SSA.
-	 However, we do the emit of the corresponding function call already,
-	 in order to keep the arguments of the call alive until the
-	 splitoff.
-	 Since at this point the function that is called is empty, we can
-	 model the function as BUILT_IN_GOACC_KERNELS_INTERNAL, which marks
-	 some of it's function arguments as non-escaping, so it acts less
-	 as an optimization barrier.  */
-	  do_splitoff = false;
-	  cfun->curr_properties &= ~PROP_gimple_eomp;
-
-	  mark_loops_in_oacc_kernels_region (region->entry, region->exit);
-	}
-  else
-	{
-	  /* Don't emit the library call.  We've already done that.  */
-	  do_emit_library_call = false;
-	  /* Transform BUILT_IN_GOACC_KERNELS_INTERNAL into
-	 BUILT_IN_GOACC_PARALLELL.  Now that the function
-	 body will be split off, we can no longer regard the
-	 omp_data_array reference as non-escaping.  */
-	  gsi = gsi_last_bb (entry_bb);
-	  gsi_prev ();
-	  gcall *call = as_a  (gsi_stmt (gsi));
-	  gcc_assert (gimple_call_builtin_p
-		  (call, BUILT_IN_GOACC_KERNELS_INTERNAL));
-	  tree fndecl = builtin_decl_explicit (BUILT_IN_GOACC_PARALLEL);
-	  gimple_call_set_fndecl (call, fndecl);
-	  gimple_call_set_fntype (call, TREE_TYPE (fndecl));
-	  gimple_call_reset_alias_info (call);
-	}
-}
+mark_loops_in_oacc_kernels_region (region->entry, region->exit);
 
   basic_block entry_succ_bb = single_succ (entry_bb);
   if (offloaded && !gimple_in_ssa_p (cfun))
@@ -12654,8 +12616,7 @@ expand_omp_target (struct omp_region *region)
 	gsi_remove (, true);
 }
 
-  if (offloaded
-  && do_splitoff)
+  if (offloaded)
 {
   unsigned srcidx, dstidx, num;
 
@@ -12852,13 +12813,6 @@ expand_omp_target (struct omp_region *region)
   pop_cfun ();
 }
 
-  if (!do_emit_library_call)
-{
-  if (gimple_in_ssa_p (cfun))
-	update_ssa (TODO_update_ssa_only_virtuals);
-  return;
-}
-
   /* Emit a library call to launch the offloading region, or do data
 

[gomp4, committed, 4/9] Revert "Add pass_dominator::sese_mode_p ()"

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


We needed the sese_mode_p functionality in the dominators pass to limit 
optimization scope to kernels regions. Now we no longer encounter 
kernels region in the kernels pass group, but split-off function that 
used to be  kernels region. So we no longer need this functionality.


This patch reverts the sese_mode_p functionality.

Thanks,
- Tom
Revert "Add pass_dominator::sese_mode_p ()"

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	revert:
	2015-10-12  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-dom.c (pass_dominator::jump_threading_p): Handle sese_mode_p.
	(pass_dominator::sese_mode_p, pass_dominator::get_sese): New protected
	virtual function.
	(pass_dominator::execute): Handle sese_mode_p.
---
 gcc/tree-ssa-dom.c | 78 ++
 1 file changed, 8 insertions(+), 70 deletions(-)

diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index e1cf38b..44253bf 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -44,7 +44,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-dom.h"
 #include "gimplify.h"
 #include "tree-cfgcleanup.h"
-#include "cfgcleanup.h"
 #include "omp-low.h"
 
 /* This file implements optimizations on the dominator tree.  */
@@ -535,17 +534,7 @@ class dominator_base : public gimple_opt_pass
   unsigned int execute (function *);
 
   /* Return true if pass should perform jump threading.  */
-  virtual bool jump_threading_p (void) { return !sese_mode_p (); }
-
-  /* Return true if pass should visit a series of seses rather than the whole
- dominator tree.  */
-  virtual bool sese_mode_p (void) { return false; }
-
-  /* In sese mode, return true if there's another sese to visit.  Return the
- sese to visit in SESE_ENTRY and SESE_EXIT.  */
-  virtual bool get_sese (basic_block *sese_entry ATTRIBUTE_UNUSED,
-			 basic_block *sese_exit ATTRIBUTE_UNUSED)
-{ gcc_unreachable (); }
+  virtual bool jump_threading_p (void) { return true; }
 }; // class dominator_base
 
 const pass_data pass_data_dominator =
@@ -602,14 +591,11 @@ dominator_base::execute (function *fun)
  LOOPS_HAVE_PREHEADERS won't be needed here.  */
   loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
 
-  if (!sese_mode_p ())
-/* Initialize the value-handle array.  */
-threadedge_initialize_values ();
+  /* Initialize the value-handle array.  */
+  threadedge_initialize_values ();
 
   if (jump_threading_p ())
 {
-  gcc_assert (!sese_mode_p ());
-
   /* We need accurate information regarding back edges in the CFG
 	 for jump threading; this may include back edges that are not part of
 	 a single loop.  */
@@ -631,29 +617,7 @@ dominator_base::execute (function *fun)
 			 const_and_copies,
 			 avail_exprs_stack,
 			 jump_threading_p ());
-  if (!sese_mode_p ())
-walker.walk (fun->cfg->x_entry_block_ptr);
-  else
-{
-  basic_block sese_entry, sese_exit;
-  while (get_sese (_entry, _exit))
-	{
-	  threadedge_initialize_values ();
-	  avail_exprs_stack->push_marker ();
-	  const_and_copies->push_marker ();
-
-	  walker.walk_until (sese_entry, sese_exit, true);
-
-	  avail_exprs_stack->pop_to_marker ();
-	  const_and_copies->pop_to_marker ();
-	  threadedge_finalize_values ();
-
-	  /* KLUDGE: The dom_walker does not allow unreachable blocks when
-	 starting the walk, and during the dom_opt_dom_walker walk we may
-	 produce unreachable blocks, so we need to clean them up here.  */
-	  delete_unreachable_blocks ();
-	}
-}
+  walker.walk (fun->cfg->x_entry_block_ptr);
 
   {
 gimple_stmt_iterator gsi;

[gomp4, committed, 5/9] Handle oacc function in parloops

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch removes handling of kernels regions in tree-parloops.c, and 
adds handling of oacc functions that used to be kernels regions before 
they were split off.


That means we no longer add a parallel pragma. OTOH, we now have to 
clear PROP_gimple_eomp in order to trigger the subsequent omp-expand pass.


Thanks,
- Tom

Handle oacc function in parloops

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (set_oacc_fn_attrib): Remove static.
	* omp-low.h (set_oacc_fn_attrib): Declare.
	* tree-parloops.c (create_parallel_loop): Remove region_entry parameter.
	Remove handling of oacc kernels pragma and GOACC_kernels_internal call.
	Remove insertion of oacc parallel pragma.  Set oacc function attributes.
	(gen_parallel_loop): Remove region_entry parameter.
	(get_omp_data_i_param): New function.
	(try_create_reduction_list): Use get_omp_data_i_param instead of
	gimple_stmt_omp_data_i_init_p.
	(ref_conflicts_with_region): Handle GIMPLE_RETURN.
	(oacc_entry_exit_ok_1): Same.  Add missing is_gimple_call test before
	gimple_call_internal_p test.
	(oacc_entry_exit_ok): Remove region_entry parameter.  Use
	get_omp_data_i_param instead of get_omp_data_i.  Set region_bbs to all
	bbs in function.  Use function entry as region entry.
	(parallelize_loops): Allow oacc functions and parallelized function if
	oacc_kernels_p.  Remove	region_entry variable.
	(pass_parallelize_loops_oacc_kernels::execute): Clear PROP_gimple_eomp
	if a loop was parallelized.
---
 gcc/omp-low.c   |   2 +-
 gcc/omp-low.h   |   1 +
 gcc/tree-parloops.c | 119 ++--
 3 files changed, 51 insertions(+), 71 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index ac8c8d0..58cb959 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12456,7 +12456,7 @@ replace_oacc_fn_attrib (tree fn, tree dims)
function attribute.  Push any that are non-constant onto the ARGS
list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
-static void
+void
 set_oacc_fn_attrib (tree fn, tree clauses, vec *args)
 {
   /* Must match GOMP_DIM ordering.  */
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index 7c9efdc..673b470 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -40,6 +40,7 @@ extern vec get_bbs_in_oacc_kernels_region (basic_block,
 extern void replace_oacc_fn_attrib (tree, tree);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
+extern void set_oacc_fn_attrib (tree, tree, vec *);
 
 extern GTY(()) vec<tree, va_gc> *offload_funcs;
 extern GTY(()) vec<tree, va_gc> *offload_vars;
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f14cf8a..c038dfe 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2017,7 +2017,7 @@ transform_to_exit_first_loop (struct loop *loop,
 static void
 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 		  tree new_data, unsigned n_threads, location_t loc,
-		  basic_block region_entry, bool oacc_kernels_p)
+		  bool oacc_kernels_p)
 {
   gimple_stmt_iterator gsi;
   basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
@@ -2039,10 +2039,6 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   paral_bb = single_pred (bb);
   gsi = gsi_last_bb (paral_bb);
 }
-  else
-/* Make sure the oacc parallel is inserted on top of the oacc kernels
-   region.  */
-gsi = gsi_last_bb (region_entry);
 
   if (!oacc_kernels_p)
 {
@@ -2056,50 +2052,10 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 }
   else
 {
-  /* Create oacc parallel pragma based on oacc kernels pragma and
-	 GOACC_kernels_internal call.  */
-  gomp_target *kernels =

[gomp4, committed, 9/9] Remove BUILT_IN_GOACC_KERNELS_INTERNAL

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch removes the no longer needed BUILT_IN_GOACC_KERNELS_INTERNAL 
builtin.


Thanks,
- Tom

Remove BUILT_IN_GOACC_KERNELS_INTERNAL

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* omp-builtins.def (BUILT_IN_GOACC_KERNELS_INTERNAL): Remove
	DEF_GOACC_BUILTIN_FNSPEC.
	* omp-low.c (expand_omp_target): Remove handling of
	BUILT_IN_GOACC_KERNELS_INTERNAL.
---
 gcc/omp-builtins.def | 5 -
 gcc/omp-low.c| 1 -
 2 files changed, 6 deletions(-)

diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 0478eaf..6908f94 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -43,11 +43,6 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_ENTER_EXIT_DATA,
 			  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
 			  ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST,
 			  ATTR_NOTHROW_LIST, "...rrr")
-DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_KERNELS_INTERNAL,
-			  "GOACC_kernels_internal",
-			  BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
-			  ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST,
-			  ATTR_NOTHROW_LIST, "...rrr")
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
 		   BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
 		   ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 84accd9..643d017 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12994,7 +12994,6 @@ expand_omp_target (struct omp_region *region)
 	depend = build_int_cst (ptr_type_node, 0);
   args.quick_push (depend);
   break;
-case BUILT_IN_GOACC_KERNELS_INTERNAL:
 case BUILT_IN_GOACC_PARALLEL:
   {
 	set_oacc_fn_attrib (child_fn, clauses, );
-- 
1.9.1



[gomp4, committed] expand oacc kernels region at same time as oacc parallel region

2015-11-05 Thread Tom de Vries

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch ( 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer 
need the kernels region to be a part of the original function when doing 
alias analysis.


So, we no longer have the need to postpone splitting off the kernels 
region into a seperate function until after alias analysis, but we can 
do this at the same time as when we expand the parallel region.


The following patch series implements that:

 1  Move expansion of kernels region back to first omp-expand
 2  Update gate_oacc_kernels to handle oacc function
 3  Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
 4  Revert "Add pass_dominator::sese_mode_p ()"
 5  Handle oacc function in parloops
 6  Update goacc kernels C testcases
 7  Update goacc kernels Fortran testcases
 8  Release_defs in expand_omp_atomic_fetch_op
 9  Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as 
single commit. Various things in kernels support will be broken in 
intermediate stages. ]


Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.

Thanks,
- Tom


[gomp4, committed, 2/9] Update gate_oacc_kernels to handle oacc function

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch updates the kernels pass group gate function.

Before, it needed to trigger on functions containing kernel regions.
Now, it needs to trigger on oacc functions that used to be kernels 
regions before they were split off.


Furthermore, I've duplicated the parloops gate here 
(flag_tree_parallelize_loops > 1).  There's not much sense in running 
the pass group unless we're trying to parallelize.


Consequently, I needed to add a "-ftree-parallelize-loops=32" settting 
to a testcase which missed that setting.


Thanks,
- Tom

Update gate_oacc_kernels to handle oacc function

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-loop.c: Include omp-low.h.
	(gate_oacc_kernels): Test for flag_tree_parallelize_loops.  Test for
	oacc function attribute.  Test for loop with in_oacc_kernels_region.

	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Run with
	-ftree-parallelize-loops=32.
---
 .../goacc/kernels-counter-var-redundant-load.c |  1 +
 gcc/tree-ssa-loop.c| 22 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
index c4ffc1d..bf59838 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
@@ -1,4 +1,5 @@
 /* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
 /* { dg-additional-options "-fdump-tree-dom_oacc_kernels3" } */
 
 #include 
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 5e0b5a5..344c6c7 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
+#include "omp-low.h"
 
 
 /* A pass making sure loops are fixed up.  */
@@ -151,7 +152,26 @@ make_pass_tree_loop (gcc::context *ctxt)
 static bool
 gate_oacc_kernels (function *fn)
 {
-  return (fn->curr_properties & PROP_gimple_eomp) == 0;
+  if (flag_tree_parallelize_loops <= 1)
+return false;
+
+  tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
+  if (oacc_function_attr == NULL_TREE)
+return false;
+
+  tree val = TREE_VALUE (oacc_function_attr);
+  while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
+val = TREE_CHAIN (val);
+
+  if (val != NULL_TREE)
+return false;
+
+  struct loop *loop;
+  FOR_EACH_LOOP (loop, 0)
+if (loop->in_oacc_kernels_region)
+  return true;
+
+  return false;
 }
 
 /* The oacc kernels superpass.  */
-- 
1.9.1



[gomp4, committed, 3/9] Revert "Add skip_stmt parm to pass_dominator::get_sese ()"

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



We were using the skip_stmt functionality in pass_dominator to skip over 
a statement '.omp_data_i = &.omp_data_arr' in the kernels region.  That 
statement is removed during function split-off, so we no longer 
encounter that statement in the kernels pass group.


This patch reverts the stip_stmt functionality.

Thanks,
- Tom

Revert "Add skip_stmt parm to pass_dominator::get_sese ()"

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	Revert:
	2015-10-12  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-dom.c (dom_opt_dom_walker::set_skip_stmt): New function.
	(dom_opt_dom_walker::m_skip_stmt): New private var.
	(pass_dominator::get_sese): Add skip_stmt parameters.
	(pass_dominator::execute): Call set_skip_stmt with statement to skip for
	sese.
	(dom_opt_dom_walker::before_dom_children): Handle m_skip_stmt.
---
 gcc/tree-ssa-dom.c | 27 +--
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index 6688a79..e1cf38b 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -505,7 +505,6 @@ public:
 
   virtual void before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
-  void set_skip_stmt (gimple *skip_stmt) { m_skip_stmt = skip_stmt; }
 
 private:
   void thread_across_edge (edge);
@@ -516,7 +515,6 @@ private:
 
   gcond *m_dummy_cond;
   bool m_jump_threading_p;
-  gimple *m_skip_stmt;
 };
 
 /* Jump threading, redundancy elimination and const/copy propagation.
@@ -544,11 +542,9 @@ class dominator_base : public gimple_opt_pass
   virtual bool sese_mode_p (void) { return false; }
 
   /* In sese mode, return true if there's another sese to visit.  Return the
- sese to visit in SESE_ENTRY and SESE_EXIT.  If a stmt in the sese should
- not be optimized, return it in SKIP_STMT.  */
+ sese to visit in SESE_ENTRY and SESE_EXIT.  */
   virtual bool get_sese (basic_block *sese_entry ATTRIBUTE_UNUSED,
-			 basic_block *sese_exit ATTRIBUTE_UNUSED,
-			 gimple **skip_stmt ATTRIBUTE_UNUSED)
+			 basic_block *sese_exit ATTRIBUTE_UNUSED)
 { gcc_unreachable (); }
 }; // class dominator_base
 
@@ -640,11 +636,8 @@ dominator_base::execute (function *fun)
   else
 {
   basic_block sese_entry, sese_exit;
-  gimple *skip_stmt = NULL;
-  while (get_sese (_entry, _exit, _stmt))
+  while (get_sese (_entry, _exit))
 	{
-	  walker.set_skip_stmt (skip_stmt);
-
 	  threadedge_initialize_values ();
 	  avail_exprs_stack->push_marker ();
 	  const_and_copies->push_marker ();
@@ -797,8 +790,7 @@ public:
 protected:
   /* dominator_base methods: */
   virtual bool sese_mode_p (void) { return true; }
-  virtual bool get_sese (basic_block *sese_entry, basic_block *sese_exit,
-			 gimple **skip_stmt)
+  virtual bool get_sese (basic_block *sese_entry, basic_block *sese_exit)
   {
 if (m_regions == NULL)
   {
@@ -821,10 +813,6 @@ protected:
 *sese_entry = BASIC_BLOCK_FOR_FN (cfun, index);
 *sese_exit = get_oacc_kernels_region_exit (*sese_entry);
 
-tree omp_data_i = get_omp_data_i (single_pred (*sese_entry));
-if (omp_data_i != NULL_TREE)
-  *skip_stmt = SSA_NAME_DEF_STMT (omp_data_i);
-
 return true;
   }
 
@@ -1446,12 +1434,7 @@ dom_opt_dom_walker::before_dom_children (basic_block bb)
   m_avail_exprs_stack->pop_to_marker ();
 
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
-{
-  if (gsi_stmt (gsi) == m_skip_stmt)
-	continue;
-
-  optimize_stmt (bb, gsi, m_const_and_copies, m_avail_exprs_stack);
-}
+optimize_stmt (bb, gsi, m_const_and_copies, m_avail_exprs_stack);
 
   /* Now prepare to process dominated blocks.  */
   if (m_jump_threading_p)
-- 
1.9.1



[gomp4, committed, 7/9] Update goacc kernels Fortran testcases

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch updates the goacc kernels Fortran testcases

Thanks,
- Tom

Update goacc kernels Fortran testcases

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* gfortran.dg/goacc/kernels-loop-2.f95: Scan for oacc function instead
	of oacc_parallel.
	* gfortran.dg/goacc/kernels-loop-data-2.f95:
	* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95:
	* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95:
	* gfortran.dg/goacc/kernels-loop-data-update.f95:
	* gfortran.dg/goacc/kernels-loop-data.f95:
	* gfortran.dg/goacc/kernels-loop-nest-independent.f95:
	* gfortran.dg/goacc/kernels-loop.f95:
	* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95  | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95  | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95| 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95| 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95   | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95   | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95| 2 +-
 .../gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index ccfc3ae..7fd6d4e 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -42,4 +42,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
 
-! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index 2f46c6d..f788f67 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -48,4 +48,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
 
-! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 34b4a06..3599052 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -48,4 +48,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
 
-! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "

[gomp4, committed, 6/9] Update goacc kernels C testcases

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



This patch updates the goacc kernels C testcases.

Thanks,
- Tom

Update goacc kernels C testcases

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* c-c++-common/goacc/kernels-acc-loop-reduction.c: Scan for oacc
	function instead of oacc_parallel.
	* c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Same.
	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: Same.
	* c-c++-common/goacc/kernels-double-reduction.c: Same.
	* c-c++-common/goacc/kernels-independent.c: Same.
	* c-c++-common/goacc/kernels-loop-2-acc-loop.c:  Same.
	* c-c++-common/goacc/kernels-loop-2.c:  Same.
	* c-c++-common/goacc/kernels-loop-3-acc-loop.c: Same.
	* c-c++-common/goacc/kernels-loop-3.c: Same.
	* c-c++-common/goacc/kernels-loop-acc-loop.c: Same.
	* c-c++-common/goacc/kernels-loop-data-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Same.
	* c-c++-common/goacc/kernels-loop-data-update.c: Same.
	* c-c++-common/goacc/kernels-loop-data.c: Same.
	* c-c++-common/goacc/kernels-loop-g.c: Same.
	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Same.
	* c-c++-common/goacc/kernels-loop-n-acc-loop.c: Same.
	* c-c++-common/goacc/kernels-loop-n.c: Same.
	* c-c++-common/goacc/kernels-loop-nest-independent.c: Same.
	* c-c++-common/goacc/kernels-loop-nest.c: Same.
	* c-c++-common/goacc/kernels-loop.c: Same.
	* c-c++-common/goacc/kernels-one-counter-var.c: Same.
	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Same.
	* c-c++-common/goacc/kernels-reduction.c: Same.
	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Same.  Update
	scan.
---
 gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c  | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c  | 2 +-
 .../c-c++-common/goacc/kernels-counter-var-redundant-load.c| 7 +++
 .../c-c++-common/goacc/kernels-counter-vars-function-scope.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c| 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-independent.c | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c  | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c| 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c| 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c| 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c | 3 ++-
 .../c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-reduction.c   | 3 ++-
 26 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-

[gomp4, committed, 8/9] Release_defs in expand_omp_atomic_fetch_op

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



The parloops pass constructs an atomic update:
...
  #pragma omp atomic_load
  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
  D.1840_60 = sum.27_56 + D.1839_59;
  #pragma omp atomic_store (D.1840_60);
...

The expand_omp_atomic_fetch_op function removes the update statement but 
doesn't release the ssa-name D.1840_60 defined by the update statement.


This causes an error when running ccp in lto1. We run into trouble here 
for this unreleased ssa-name, because SSA_NAME_VAR (var) == NULL_TREE 
and TREE_CODE (NULL_TREE) causes a sigsegv:

...
get_default_value (tree var)
{
  ccp_prop_value_t val = { UNINITIALIZED, NULL_TREE, 0 };
  gimple *stmt;

  stmt = SSA_NAME_DEF_STMT (var);

  if (gimple_nop_p (stmt))
{
  /* Variables defined by an empty statement are those used
 before being initialized.  If VAR is a local variable, we
 can assume initially that it is UNDEFINED, otherwise we must
 consider it VARYING.  */
  if (!virtual_operand_p (var)
  && TREE_CODE (SSA_NAME_VAR (var)) == VAR_DECL)
val.lattice_val = UNDEFINED;
...

This patch fixes the problem by releasing the ssa-name in 
expand_omp_atomic_fetch_op function.


Thanks,
- Tom
Release_defs in expand_omp_atomic_fetch_op

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (expand_omp_atomic_fetch_op):  Release defs of update stmt.
---
 gcc/omp-low.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 58cb959..84accd9 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11914,10 +11914,14 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
   gsi_remove (, true);
   gsi = gsi_last_bb (store_bb);
+  stmt = gsi_stmt (gsi);
   gsi_remove (, true);
 
   if (gimple_in_ssa_p (cfun))
-update_ssa (TODO_update_ssa_no_phi);
+{
+  release_defs (stmt);
+  update_ssa (TODO_update_ssa_no_phi);
+}
 
   return true;
 }
-- 
1.9.1



[gomp4, committed] Revert "Add counter inits to zero_iter_bb in expand_omp_for_init_counts"

2015-11-07 Thread Tom de Vries

Hi,

this patch reverts "Add counter inits to zero_iter_bb in 
expand_omp_for_init_counts". We no longer split off the kernels region 
in ssa-mode, so there's no need for this patch anymore.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Add counter inits to zero_iter_bb in expand_omp_for_init_counts"

2015-10-08  Tom de Vries  <t...@codesourcery.com>

	revert:
	2015-10-08  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (expand_omp_for_init_counts): Add inits for counters in
	zero_iter_bb.
	(expand_omp_for_generic): Remove TREE_NO_WARNING setttings on counters.

	* gcc.dg/gomp/collapse-2.c: New test.
---
 gcc/omp-low.c  | 35 ++
 gcc/testsuite/gcc.dg/gomp/collapse-2.c | 19 --
 2 files changed, 14 insertions(+), 40 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/gomp/collapse-2.c

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 437f8c1..76f1ae9 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -7437,7 +7437,6 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	  break;
 	}
 }
-  bool created_zero_iter_bb = false;
   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
 {
   tree itype = TREE_TYPE (fd->loops[i].v);
@@ -7493,7 +7492,6 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	  gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
 	  set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
    entry_bb);
-	  created_zero_iter_bb = true;
 	}
 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
 	  ne->probability = REG_BR_PROB_BASE / 2000 - 1;
@@ -7547,25 +7545,6 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
 	}
 }
-
-  if (created_zero_iter_bb)
-{
-  /* Atm counts[0] doesn't seem to be used beyond create_zero_iter_bb,
-	 but for robustness-sake we include that one as well.  */
-  for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
-	{
-	  tree var = counts[i];
-	  if (!SSA_VAR_P (var))
-	continue;
-
-	  tree zero = build_zero_cst (type);
-	  gassign *assign_stmt = gimple_build_assign (var, zero);
-	  basic_block _iter_bb
-	= i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
-	  gimple_stmt_iterator gsi = gsi_after_labels (zero_iter_bb);
-	  gsi_insert_before (, assign_stmt, GSI_SAME_STMT);
-	}
-}
 }
 
 
@@ -8237,6 +8216,7 @@ expand_omp_for_generic (struct omp_region *region,
   bool seq_loop = (start_fn == BUILT_IN_NONE || next_fn == BUILT_IN_NONE);
   edge e, ne;
   tree *counts = NULL;
+  int i;
   bool ordered_lastprivate = false;
 
   gcc_assert (!broken_loop || !in_combined_parallel);
@@ -8283,6 +8263,13 @@ expand_omp_for_generic (struct omp_region *region,
 
   if (zero_iter1_bb)
 	{
+	  /* Some counts[i] vars might be uninitialized if
+	 some loop has zero iterations.  But the body shouldn't
+	 be executed in that case, so just avoid uninit warnings.  */
+	  for (i = first_zero_iter1;
+	   i < (fd->ordered ? fd->ordered : fd->collapse); i++)
+	if (SSA_VAR_P (counts[i]))
+	  TREE_NO_WARNING (counts[i]) = 1;
 	  gsi_prev ();
 	  e = split_block (entry_bb, gsi_stmt (gsi));
 	  entry_bb = e->dest;
@@ -8294,6 +8281,12 @@ expand_omp_for_generic (struct omp_region *region,
 	}
   if (zero_iter2_bb)
 	{
+	  /* Some counts[i] vars might be uninitialized if
+	 some loop has zero iterations.  But the body shouldn't
+	 be executed in that case, so just avoid uninit warnings.  */
+	  for (i = first_zero_iter2; i < fd->ordered; i++)
+	if (SSA_VAR_P (counts[i]))
+	  TREE_NO_WARNING (counts[i]) = 1;
 	  if (zero_iter1_bb)
 	make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
 	  else
diff --git a/gcc/testsuite/gcc.dg/gomp/collapse-2.c b/gcc/testsuite/gcc.dg/gomp/collapse-2.c
deleted file mode 100644
index 5319f89..000
--- a/gcc/testsuite/gcc.dg/gomp/collapse-2.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fopenmp -fdump-tree-ssa" } */
-
-#define N 100
-
-int a[N][N];
-
-void
-foo (int m, int n)
-{
-  int i, j;
-#pragma omp parallel
-#pragma omp for collapse(2) schedule (runtime)
-  for (i = 0; i < m; i++)
-for (j = 0; j < n; j++)
-  a[i][j] = 1;
-}
-
-/* { dg-final { scan-tree-dump-not "(?n)PHI.*count.*\\(D\\)" "ssa" } } */
-- 
1.9.1



[gomp4, committed] Make formatting resemble trunk in f95-lang.c

2015-11-07 Thread Tom de Vries

Hi,

this patch removes formatting differences with trunk.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Make formatting resemble trunk in f95-lang.c

2015-11-07  Tom de Vries  <t...@codesourcery.com>

	* f95-lang.c: Make formatting resemble trunk.
---
 gcc/fortran/f95-lang.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index a63ebb3..40546e6 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -563,6 +563,7 @@ gfc_define_builtin (const char *name, tree type, enum built_in_function code,
   set_builtin_decl (code, decl, true);
 }
 
+
 #define DO_DEFINE_MATH_BUILTIN(code, name, argtype, tbase) \
 gfc_define_builtin ("__builtin_" name "l", tbase##longdouble[argtype], \
 			BUILT_IN_ ## code ## L, name "l", \
-- 
1.9.1



[gomp4, committed] Cleanup formatting of DEF_GOACC_BUILTINs

2015-11-07 Thread Tom de Vries

Hi,

this patch removes formatting differences with trunk.

Committed to gomp-4_0-branch.

Thanks,
- Tom
>From a77fd266102498a909886cecde1b57adf9350d90 Mon Sep 17 00:00:00 2001
From: Tom de Vries <t...@codesourcery.com>
Date: Fri, 6 Nov 2015 22:11:08 +0100
Subject: [PATCH 2/4] Cleanup formatting of DEF_GOACC_BUILTINs

2015-11-07  Tom de Vries  <t...@codesourcery.com>

	* omp-builtins.def: Cleanup formatting.
---
 gcc/omp-builtins.def | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 1504a48..e04edc2 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -32,12 +32,10 @@ along with GCC; see the file COPYING3.  If not see
 DEF_GOACC_BUILTIN (BUILT_IN_ACC_GET_DEVICE_TYPE, "acc_get_device_type",
 		   BT_FN_INT, ATTR_NOTHROW_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_START, "GOACC_data_start",
-		   BT_FN_VOID_INT_SIZE_PTR_PTR_PTR,
-		   ATTR_NOTHROW_LIST)
+		   BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end",
 		   BT_FN_VOID, ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA,
-		   "GOACC_enter_exit_data",
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
 		   BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
 		   ATTR_NOTHROW_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
-- 
1.9.1



[gomp4, committed] Remove no_overflow_tree_code

2015-11-07 Thread Tom de Vries

Hi,

this patch removes dead code from gomp-4_0-branch.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove no_overflow_tree_code

2015-11-07  Tom de Vries  <t...@codesourcery.com>

	* tree.c (no_overflow_tree_code): Remove.
	* tree.h (no_overflow_tree_code): Remove.
---
 gcc/tree.c | 24 
 gcc/tree.h |  1 -
 2 files changed, 25 deletions(-)

diff --git a/gcc/tree.c b/gcc/tree.c
index 535c2d1..c7a3313 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -7606,30 +7606,6 @@ associative_tree_code (enum tree_code code)
   return false;
 }
 
-/* Return true if CODE represents an tree code that cannot overflow, given
-   operand type OP_TYPE.  Otherwise return false.  */
-bool
-no_overflow_tree_code (enum tree_code code, tree op_type)
-{
-  /* For now, just handle associative tree codes.  */
-  switch (code)
-{
-case BIT_IOR_EXPR:
-case BIT_AND_EXPR:
-case BIT_XOR_EXPR:
-  return true;
-
-case MIN_EXPR:
-case MAX_EXPR:
-  return (ANY_INTEGRAL_TYPE_P (op_type)
-	  && TREE_CODE (op_type) != COMPLEX_TYPE);
-
-default:
-  break;
-}
-  return false;
-}
-
 /* Return true if CODE represents a commutative tree code.  Otherwise
return false.  */
 bool
diff --git a/gcc/tree.h b/gcc/tree.h
index 92d6a89..f3e2a48 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4451,7 +4451,6 @@ extern tree get_file_function_name (const char *);
 extern tree get_callee_fndecl (const_tree);
 extern int type_num_arguments (const_tree);
 extern bool associative_tree_code (enum tree_code);
-extern bool no_overflow_tree_code (enum tree_code, tree);
 extern bool commutative_tree_code (enum tree_code);
 extern bool commutative_ternary_tree_code (enum tree_code);
 extern bool operation_can_overflow (enum tree_code);
-- 
1.9.1



[gomp4, committed] Undo cgraph_node::release_body workaround

2015-11-07 Thread Tom de Vries

Hi,

this patch removes a workaround that's no longer needed, now that we 
split off the kernels region at the first omp-expand pass.


Committed to gomp-4_0-branch.

Thanks,
- Tom
>From 5e9a609006b45c51598a3d52d5ab55b72a186f67 Mon Sep 17 00:00:00 2001
From: Tom de Vries <t...@codesourcery.com>
Date: Fri, 6 Nov 2015 22:10:31 +0100
Subject: [PATCH 1/4] Undo cgraph_node::release_body workaround

2015-11-07  Tom de Vries  <t...@codesourcery.com>

	* cgraph.c (cgraph_node::release_body): Remove workaround.
---
 gcc/cgraph.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 8fe1ab4..7839c72 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1707,15 +1707,6 @@ release_function_body (tree decl)
 void
 cgraph_node::release_body (bool keep_arguments)
 {
-  /* The omp-expansion of the oacc kernels directive is post-poned till after
- all_small_ipa_passes.  That means pass_ipa_free_lang_data, which tries to
- release the body of the offload function, is run before omp_expand_target 
- can process the oacc kernels directive,  and omp_expand_target would crash
- trying to access the body.  This snippet works around this problem.
- FIXME: This should probably be fixed in a different way.  */
-  if (offloadable)
-return;
-
   ipa_transforms_to_apply.release ();
   if (!used_as_abstract_origin && symtab->state != PARSING)
 {
-- 
1.9.1



Re: [gomp4, committed] Implement -foffload-alias

2015-11-04 Thread Tom de Vries

On 04/11/15 09:47, Thomas Schwinge wrote:

+/* Check that the loop has been split off into a function.  */
>+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 
"optimized" } } */

For C we get:

 ;; Function foo._omp_fn.0 (foo._omp_fn.0, funcdef_no=12, decl_uid=2534, 
cgraph_uid=14, symbol_order=14)

..., so that matches, but for C++ we get:

 ;; Function foo(unsigned int*, unsigned int*, unsigned int*) [clone 
._omp_fn.0] (_ZL3fooPjS_S_._omp_fn.0, funcdef_no=12, decl_uid=2416, 
cgraph_uid=14, symbol_order=14)

..., which doesn't match, so this directive FAILs.



Hi Thomas,

thanks for noticing.

Fixed as attached.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Fixup goacc/kernels-loop-offload-alias-none.c

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* c-c++-common/goacc/kernels-loop-offload-alias-none.c: Fix
	foo._omp_fn.0 function name scanning.
---
 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
index bb96330..79d8daa 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
@@ -49,7 +49,7 @@ main (void)
 }
 
 /* Check that the loop has been split off into a function.  */
-/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
 
 /* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
 /* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
-- 
1.9.1



[trivial, committed] Use decl_type in create_variable_info_for_1

2015-11-04 Thread Tom de Vries

Hi,

this patch uses the the decl_type variable more consistently in 
create_variable_info_for_1.


Bootstrapped and reg-tested on x86_64.

Committed to trunk as trivial.

Thanks,
- Tom
Use decl_type in create_variable_info_for_1

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (create_variable_info_for_1): Use decl_type
	variable.
---
 gcc/tree-ssa-structalias.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index ded5a1e..98b5f16 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5718,8 +5718,8 @@ create_variable_info_for_1 (tree decl, const char *name, bool add_id)
   vi->fullsize = tree_to_uhwi (declsize);
   vi->size = vi->fullsize;
   vi->is_full_var = true;
-  if (POINTER_TYPE_P (TREE_TYPE (decl))
-	  && TYPE_RESTRICT (TREE_TYPE (decl)))
+  if (POINTER_TYPE_P (decl_type)
+	  && TYPE_RESTRICT (decl_type))
 	vi->only_restrict_pointers = 1;
   fieldstack.release ();
   return vi;
-- 
1.9.1



[committed] Handle recursive restrict in function parameter

2015-11-04 Thread Tom de Vries

On 04/11/15 10:28, Richard Biener wrote:

I think I can postpone the creation of the heapvar till where you suggest in
>create_variable_info_for_1, but I'd still need a means
>to communicate the TREE_TYPE (field_type) from push_fields_onto_fieldstack to
>create_variable_info_for_1.
>
>A simple implementation would be a new field:
>...
>@@ -5195,6 +5197,8 @@ struct fieldoff
>unsigned may_have_pointers : 1;
>
>unsigned only_restrict_pointers : 1;
>+
>+ tree restrict_pointed_type;
>};
>...
>Which AFAIU will change fieldoff size.

It's ok to change fieldoff size if there is a reason to;)

Patch is ok along this line.


Updated patch accordingly.

Bootstrapped and reg-tested on x86_64.

Committed to trunk as attached.

Thanks,
- Tom
Handle recursive restrict in function parameter

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	PR tree-optimization/67742
	* tree-ssa-structalias.c (struct fieldoff): Add restrict_pointed_type
	field.
	(push_fields_onto_fieldstack): Handle restrict_pointed_type field.
	(create_variable_info_for_1): Add and handle handle_param parameter.
	Add restrict handling.
	(create_variable_info_for): Call create_variable_info_for_1 with extra
	arg.
	(make_param_constraints): Drop restrict_name parameter.  Ignore
	vi->only_restrict_pointers.
	(intra_create_variable_infos): Call create_variable_info_for_1 with
	extra arg.  Remove restrict handling.  Call make_param_constraints with
	one less arg.

	* gcc.dg/tree-ssa/restrict-7.c: New test.
	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 +
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 +++
 gcc/tree-ssa-structalias.c | 78 +-
 3 files changed, 74 insertions(+), 33 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
new file mode 100644
index 000..f7a68c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int
+f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
+{
+  *b = 1;
+  ***a  = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new file mode 100644
index 000..b0ab164
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+struct s
+{
+  int *__restrict__ *__restrict__ pp;
+};
+
+int
+f (struct s s, int *b)
+{
+  *b = 1;
+  **s.pp = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 98b5f16..52a35f6 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -307,6 +307,7 @@ static varinfo_t first_or_preceding_vi_for_offset (varinfo_t,
 		   unsigned HOST_WIDE_INT);
 static varinfo_t lookup_vi_for_tree (tree);
 static inline bool type_can_have_subvars (const_tree);
+static void make_param_constraints (varinfo_t);
 
 /* Pool of variable info structures.  */
 static object_allocator variable_info_pool
@@ -393,7 +394,6 @@ new_var_info (tree t, const char *name, bool add_id)
   return ret;
 }
 
-
 /* A map mapping call statements to per-stmt variables for uses
and clobbers specific to the call.  */
 static hash_map *call_stmt_vars;
@@ -5195,6 +5195,8 @@ struct fieldoff
   unsigned may_have_pointers : 1;
 
   unsigned only_restrict_pointers : 1;
+
+  tree restrict_pointed_type;
 };
 typedef struct fieldoff fieldoff_s;
 
@@ -5340,7 +5342,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	if (!pair
 		&& offset + foff != 0)
 	  {
-		fieldoff_s e = {0, offset + foff, false, false, false, false};
+		fieldoff_s e
+		  = {0, offset + foff, false, false, false, false, NULL_TREE};
 		pair = fieldstack->safe_push (e);
 	  }
 
@@ -5374,6 +5377,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 		  = (!has_unknown_size
 		 && POINTER_TYPE_P (field_type)
 		 && TYPE_RESTRICT (field_type));
+		if (e.only_restrict_pointers)
+		  e.restrict_pointed_type = TREE_TYPE (field_type);
 		fieldstack->safe_push (e);
 	  }
 	  }
@@ -5642,10 +5647,11 @@ check_for_overlaps (vec fieldstack)
 
 /* Create a varinfo structure for NAME and DECL, and add it to VARMAP.
This will also create any varinfo structures necessary for fields
-   of DECL.  */
+   of DECL.  DECL is a function parameter if HANDLE_PARAM is set.  */
 
 static varinfo_t
-create_variable_info_for_1 (tree decl, const char *name, bool add_id

Re: [gomp4,committed] Handle recursive restrict in function parameter

2015-11-04 Thread Tom de Vries

On 03/11/15 14:58, Tom de Vries wrote:

This patch adds handling of all the restrict qualifiers in the type of a
function parameter.



And committed to gomp-4_0-branch.


I've reverted this patch, and backported the version from trunk.

Committed as attached.

Thanks,
- Tom
[PATCH 1/2] Revert "Handle recursive restrict in function parameter"

2015-11-04  Tom de Vries  <t...@codesourcery.com>

	revert:
	2015-10-03  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
	(push_fields_onto_fieldstack): Add and handle handle_param parameter.
	(create_variable_info_for_1): Add and handle
	handle_param parameter.  Add extra arg to call to
	push_fields_onto_fieldstack.  Handle restrict pointer fields.
	(create_variable_info_for): Call create_variable_info_for_1 with extra
	arg.
	(make_param_constraints): Drop restrict_name parameter.  Ignore
	vi->only_restrict_pointers.
	(intra_create_variable_infos): Call create_variable_info_for_1 with
	extra arg.  Remove restrict handling.  Call make_param_constraints with
	one less arg.

	* gcc.dg/tree-ssa/restrict-7.c: New test.
	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 --
 gcc/tree-ssa-structalias.c | 90 --
 3 files changed, 36 insertions(+), 83 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
deleted file mode 100644
index f7a68c7..000
--- a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-fre1" } */
-
-int
-f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
-{
-  *b = 1;
-  ***a  = 2;
-  return *b;
-}
-
-/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
deleted file mode 100644
index b0ab164..000
--- a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-fre1" } */
-
-struct s
-{
-  int *__restrict__ *__restrict__ pp;
-};
-
-int
-f (struct s s, int *b)
-{
-  *b = 1;
-  **s.pp = 2;
-  return *b;
-}
-
-/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 074285c..f4c875f 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -320,7 +320,6 @@ static varinfo_t first_or_preceding_vi_for_offset (varinfo_t,
 		   unsigned HOST_WIDE_INT);
 static varinfo_t lookup_vi_for_tree (tree);
 static inline bool type_can_have_subvars (const_tree);
-static void make_param_constraints (varinfo_t);
 
 /* Pool of variable info structures.  */
 static object_allocator variable_info_pool
@@ -407,7 +406,6 @@ new_var_info (tree t, const char *name, bool add_id)
   return ret;
 }
 
-static varinfo_t create_variable_info_for_1 (tree, const char *, bool, bool);
 
 /* A map mapping call statements to per-stmt variables for uses
and clobbers specific to the call.  */
@@ -5210,8 +5208,6 @@ struct fieldoff
   unsigned may_have_pointers : 1;
 
   unsigned only_restrict_pointers : 1;
-
-  varinfo_t restrict_var;
 };
 typedef struct fieldoff fieldoff_s;
 
@@ -5306,12 +5302,11 @@ field_must_have_pointers (tree t)
OFFSET is used to keep track of the offset in this entire
structure, rather than just the immediately containing structure.
Returns false if the caller is supposed to handle the field we
-   recursed for.  If HANDLE_PARAM is set, we're handling part of a function
-   parameter.  */
+   recursed for.  */
 
 static bool
 push_fields_onto_fieldstack (tree type, vec *fieldstack,
-			 HOST_WIDE_INT offset, bool handle_param)
+			 HOST_WIDE_INT offset)
 {
   tree field;
   bool empty_p = true;
@@ -5337,7 +5332,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	|| TREE_CODE (field_type) == UNION_TYPE)
 	  push = true;
 	else if (!push_fields_onto_fieldstack
-		(field_type, fieldstack, offset + foff, handle_param)
+		(field_type, fieldstack, offset + foff)
 		 && (DECL_SIZE (field)
 		 && !integer_zerop (DECL_SIZE (field
 	  /* Empty structures may have actual size, like in C++.  So
@@ -5358,8 +5353,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	if (!pair
 		&& offset + foff != 0)
 	  {
-		fieldoff_s e = {0, offset + foff, false, false, false, false,
-NULL};
+		fieldoff_s e = {0, offset + foff, false, false, false, false};
 		pair = fieldstack->safe_push (e);
 	  }
 
@@ -5393,19 +5387,6 @@ push_fie

Re: [PATCH, 3/6] Add recursion to make_param_constraints

2015-11-02 Thread Tom de Vries

On 02/11/15 16:25, Richard Biener wrote:

On Sun, 1 Nov 2015, Tom de Vries wrote:


>On 01/11/15 19:03, Tom de Vries wrote:

> >
> >So, the new patch series is:
> >
> >   1Rename make_restrict_var_constraints to make_param_constraints
> >   2Handle recursive restrict in function parameter
> >
> >I'll repost in reply to this message.

>
>This no-functional-changes patch:
>- moves the one constraint handling loop left in
>intra_create_variable_infos to make_restrict_var_constraints
>- renames make_restrict_var_constraints to make_param_constraints
>- adds a parameter toplevel to make_param_constraints to distinguish
>between the two calling contexts
>- adds a parmeter restrict_name that allows to pass in the name of
>   restrict vars
>
>This patch was posted before at
>https://gcc.gnu.org/ml/gcc-patches/2015-10/msg03111.html  .

+ if (toplevel)
+   make_constraint_from (vi, nonlocal_id);
+ else
+   make_copy_constraint (vi, nonlocal_id);

I think make_constraint_from is what we want in both cases.



Committed as separate patch, as attached (1st patch).


Ok with this change (thus drop the toplevel parameter).



Committed as attached (2nd patch).

Thanks,
- Tom

Replace make_copy_constraint with make_constraint_from in make_restrict_var_constraints

2015-11-02  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (make_restrict_var_constraints): Replace
	make_copy_constraint call with make_constraint_from call.
---
 gcc/tree-ssa-structalias.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 55f72a2..773731d 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5890,7 +5890,7 @@ make_restrict_var_constraints (varinfo_t vi)
 	if (vi->only_restrict_pointers)
 	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT", true);
 	else
-	  make_copy_constraint (vi, nonlocal_id);
+	  make_constraint_from (vi, nonlocal_id);
       }
 }
 
-- 
1.9.1

Rename make_restrict_var_constraints to make_param_constraints

2015-10-27  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-structalias.c (make_restrict_var_constraints): Rename to ...
	(make_param_constraints): ... this.  Add and handle restrict_name
	parameter.  Handle is_full_var case.
	(intra_create_variable_infos): Use make_param_constraints.
---
 gcc/tree-ssa-structalias.c | 33 ++---
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 773731d..ded5a1e 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5879,19 +5879,22 @@ debug_solution_for_var (unsigned int var)
   dump_solution_for_var (stderr, var);
 }
 
-/* Register the constraints for restrict var VI.  */
+/* Register the constraints for function parameter related VI.  Use RESTRICT_NAME
+   as the base name of created restrict vars.  */
 
 static void
-make_restrict_var_constraints (varinfo_t vi)
+make_param_constraints (varinfo_t vi, const char *restrict_name)
 {
   for (; vi; vi = vi_next (vi))
-if (vi->may_have_pointers)
-  {
-	if (vi->only_restrict_pointers)
-	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT", true);
-	else
-	  make_constraint_from (vi, nonlocal_id);
-  }
+{
+  if (vi->only_restrict_pointers)
+	make_constraint_from_global_restrict (vi, restrict_name, true);
+  else if (vi->may_have_pointers)
+	make_constraint_from (vi, nonlocal_id);
+
+  if (vi->is_full_var)
+	break;
+}
 }
 
 /* Create varinfo structures for all of the variables in the
@@ -5928,19 +5931,11 @@ intra_create_variable_infos (struct function *fn)
 	  vi->is_restrict_var = 1;
 	  insert_vi_for_tree (heapvar, vi);
 	  make_constraint_from (p, vi->id);
-	  make_restrict_var_constraints (vi);
+	  make_param_constraints (vi, "GLOBAL_RESTRICT");
 	  continue;
 	}
 
-  for (; p; p = vi_next (p))
-	{
-	  if (p->only_restrict_pointers)
-	make_constraint_from_global_restrict (p, "PARM_RESTRICT", true);
-	  else if (p->may_have_pointers)
-	make_constraint_from (p, nonlocal_id);
-	  if (p->is_full_var)
-	break;
-	}
+  make_param_constraints (p, "PARM_RESTRICT");
 }
 
   /* Add a constraint for a result decl that is passed by reference.  */
-- 
1.9.1



[gomp4, committed] Revert omp_data_i init handling in copy_prop

2015-11-05 Thread Tom de Vries

Hi,

this patch reverts omp_data_i init handling in copy_prop.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert omp_data_i init handling in copy_prop

2015-11-05  Tom de Vries  <t...@codesourcery.com>

	revert:
	2015-04-21  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-copy.c (stmt_may_generate_copy): Handle .omp_data_i init
	conservatively.
---
 gcc/tree-ssa-copy.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/gcc/tree-ssa-copy.c b/gcc/tree-ssa-copy.c
index 4a3e4bd..4992bda 100644
--- a/gcc/tree-ssa-copy.c
+++ b/gcc/tree-ssa-copy.c
@@ -40,7 +40,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-scalar-evolution.h"
 #include "tree-ssa-dom.h"
 #include "tree-ssa-loop-niter.h"
-#include "omp-low.h"
 
 
 /* This file implements the copy propagation pass and provides a
@@ -96,9 +95,6 @@ stmt_may_generate_copy (gimple *stmt)
   if (gimple_has_volatile_ops (stmt))
 return false;
 
-  if (gimple_stmt_omp_data_i_init_p (stmt))
-return false;
-
   /* Statements with loads and/or stores will never generate a useful copy.  */
   if (gimple_vuse (stmt))
 return false;
-- 
1.9.1



[gomp4, committed] Remove gimple_stmt_omp_data_i_init_p

2015-11-05 Thread Tom de Vries

Hi,

this patch:
- removes a superfluous test on gimple_stmt_omp_data_i_init_p in
  tree-parloops.c
- removes unused functions gimple_stmt_omp_data_i_init_p and
  get_omp_data_i.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove gimple_stmt_omp_data_i_init_p

2015-11-05  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (gimple_stmt_omp_data_i_init_p, get_omp_data_i): Remove.
	* omp-low.h: Same.
	* tree-parloops.c (oacc_entry_exit_ok_1): Remove test on
	gimple_stmt_omp_data_i_init_p.
---
 gcc/omp-low.c   | 48 
 gcc/omp-low.h   |  2 --
 gcc/tree-parloops.c |  2 --
 3 files changed, 52 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 643d017..b3731e3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -18681,54 +18681,6 @@ oacc_kernels_region_entry_p (basic_block bb, gomp_target **directive)
   return res;
 }
 
-/* Return true if STMT is copy assignment .omp_data_i = &.omp_data_arr.  */
-
-bool
-gimple_stmt_omp_data_i_init_p (gimple *stmt)
-{
-  /* Extract obj from stmt 'a =   */
-  if (!gimple_assign_cast_p (stmt)
-  && !gimple_assign_single_p (stmt))
-return false;
-  tree rhs = gimple_assign_rhs1 (stmt);
-  if (TREE_CODE (rhs) != ADDR_EXPR)
-return false;
-  tree obj = TREE_OPERAND (rhs, 0);
-
-  /* Check that the last statement in the preceding bb is an oacc kernels
- stmt.  */
-  basic_block bb = gimple_bb (stmt);
-  gomp_target *kernels;
-  if (!oacc_kernels_region_entry_p (bb, ))
-return false;
-
-  /* Get omp_data_arr from the oacc kernels stmt.  */
-  tree data_arg = gimple_omp_target_data_arg (kernels);
-  tree omp_data_arr = TREE_VEC_ELT (data_arg, 0);
-
-  /* If obj is omp_data_arr, we've found the .omp_data_i init statement.  */
-  return operand_equal_p (obj, omp_data_arr, 0);
-}
-
-
-/* Return omp_data_i corresponding to the assignment
-   .omp_data_i = &.omp_data_arr in oacc kernels region entry REGION_ENTRY.  */
-
-tree
-get_omp_data_i (basic_block region_entry)
-{
-  if (!single_succ_p (region_entry))
-return NULL_TREE;
-  basic_block bb = single_succ (region_entry);
-  gimple_stmt_iterator gsi = gsi_start_bb (bb);
-  if (gsi_end_p (gsi))
-return NULL_TREE;
-  gimple *stmt = gsi_stmt (gsi);
-  if (!gimple_stmt_omp_data_i_init_p (stmt))
-return NULL_TREE;
-  return gimple_assign_lhs (stmt);
-}
-
 namespace {
 
 const pass_data pass_data_late_lower_omp =
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index 673b470..d1755a8 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -30,8 +30,6 @@ extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
 extern void omp_finish_file (void);
 extern tree omp_member_access_dummy_var (tree);
-extern bool gimple_stmt_omp_data_i_init_p (gimple *);
-extern tree get_omp_data_i (basic_block);
 extern bool oacc_kernels_region_entry_p (basic_block, gomp_target **);
 extern basic_block get_oacc_kernels_region_exit (basic_block);
 extern basic_block loop_get_oacc_kernels_region_entry (struct loop *);
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index c038dfe..7e888d8 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2956,8 +2956,6 @@ oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec region_bbs,
 	}
 	  else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
 	continue;
-	  else if (gimple_stmt_omp_data_i_init_p (stmt))
-	continue;
 	  else if (!gimple_has_side_effects (stmt)
 		   && !gimple_could_trap_p (stmt)
 		   && !stmt_could_throw_p (stmt)
-- 
1.9.1



[gomp4, committed] Remove misc oacc kernels functions

2015-11-05 Thread Tom de Vries

Hi,

this patch removes a number of unused function related to the oacc 
kernels region.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove misc oacc kernels functions

2015-11-05  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (get_bbs_in_oacc_kernels_region):
	(loop_get_oacc_kernels_region_entry, get_oacc_kernels_region_exit)
	(oacc_kernels_region_entry_p): Remove.
	* omp-low.h: Same.
---
 gcc/omp-low.c | 134 --
 gcc/omp-low.h |   5 ---
 2 files changed, 139 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b3731e3..debedb1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12295,115 +12295,6 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry,
   loop->in_oacc_kernels_region = true;
 }
 
-/* Return blocks in oacc kernels region delimited by REGION_ENTRY and
-   REGION_EXIT.  */
-
-vec
-get_bbs_in_oacc_kernels_region (basic_block region_entry,
- basic_block region_exit)
-{
-  bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
-  unsigned di;
-  basic_block bb;
-
-  bitmap_clear (excludes_bitmap);
-
-  /* Get all the blocks dominated by the region entry.  That will include the
- entire region.  */
-  vec dominated
-= get_all_dominated_blocks (CDI_DOMINATORS, region_entry);
-
-  bitmap_set_bit (excludes_bitmap, region_entry->index);
-
-  /* Exclude all the blocks which are not in the region: the blocks dominated by
- the region exit.  */
-  if (region_exit != NULL)
-{
-  vec excludes
-	= get_all_dominated_blocks (CDI_DOMINATORS, region_exit);
-  FOR_EACH_VEC_ELT (excludes, di, bb)
-	bitmap_set_bit (excludes_bitmap, bb->index);
-  bitmap_clear_bit (excludes_bitmap, region_exit->index);
-}
-
-  vec bbs = vNULL;
-
-  FOR_EACH_VEC_ELT (dominated, di, bb)
-if (!bitmap_bit_p (excludes_bitmap, bb->index))
-  bbs.safe_push (bb);
-
-  return bbs;
-}
-
-/* Return the entry basic block of the oacc kernels region containing LOOP.  */
-
-basic_block
-loop_get_oacc_kernels_region_entry (struct loop *loop)
-{
-  if (!loop->in_oacc_kernels_region)
-return NULL;
-
-  basic_block bb = loop->header;
-  while (true)
-{
-  bb = get_immediate_dominator (CDI_DOMINATORS, bb);
-  gcc_assert (bb != NULL);
-
-  gimple *last = last_stmt (bb);
-  if (last != NULL
-	  && gimple_code (last) == GIMPLE_OMP_TARGET
-	  && gimple_omp_target_kind (last) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-	return bb;
-}
-}
-
-/* Return the oacc kernels region exit corresponding to REGION_ENTRY.  */
-
-basic_block
-get_oacc_kernels_region_exit (basic_block region_entry)
-{
-  gcc_checking_assert (oacc_kernels_region_entry_p (region_entry, NULL));
-
-  bitmap to_visit = BITMAP_ALLOC (NULL);
-  bitmap visited = BITMAP_ALLOC (NULL);
-  bitmap_clear (to_visit);
-  bitmap_clear (visited);
-
-  bitmap_set_bit (to_visit, region_entry->index);
-
-  basic_block bb;
-  while (true)
-{
-  if (bitmap_empty_p (to_visit))
-	{
-	  bb = NULL;
-	  break;
-	}
-
-  unsigned int index = bitmap_first_set_bit (to_visit);
-  bitmap_clear_bit (to_visit, index);
-  bitmap_set_bit (visited, index);
-  bb = BASIC_BLOCK_FOR_FN (cfun, index);
-
-  gimple *last = last_stmt (bb);
-  if (last != NULL
-	  && gimple_code (last) == GIMPLE_OMP_RETURN)
-	break;
-
-  edge_iterator ei;
-  for (ei = ei_start (bb->succs); !ei_end_p (ei); ei_next ())
-	{
-	  edge e = ei_edge (ei);
-	  unsigned int dest_index = e->dest->index;
-	  if (!bitmap_bit_p (visited, dest_index))
-	bitmap_set_bit (to_visit, dest_index);
-	}
-}
-
-  BITMAP_FREE (to_visit);
-  return bb;
-}
-
 /* Encode an oacc launch argument.  This matches the GOMP_LAUNCH_PACK
macro on gomp-constants.h.  We do not check for overflow.  */
 
@@ -18656,31 +18547,6 @@ omp_finish_file (void)
 }
 }
 
-/* Return true if BB is an oacc kernels region entry.  If DIRECTIVE is non-null,
-   return the corresponding kernels directive in *DIRECTIVE.  */
-
-bool
-oacc_kernels_region_entry_p (basic_block bb, gomp_target **directive)
-{
-  /* Check that the last statement in the preceding bb is an oacc kernels
- stmt.  */
-  if (!single_pred_p (bb))
-return false;
-  gimple *last = last_stmt (single_pred (bb));
-  if (last == NULL
-  || gimple_code (last) != GIMPLE_OMP_TARGET)
-return false;
-  gomp_target *kernels = as_a  (last);
-
-  bool res = (gimple_omp_target_kind (kernels)
-	  == GF_OMP_TARGET_KIND_OACC_KERNELS);
-
-  if (res && directive)
-*directive = kernels;
-
-  return res;
-}
-
 namespace {
 
 const pass_data pass_data_late_lower_omp =
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index d1755a8..e4c81b2 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -30,11 +30,6 @@ extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
 extern void omp_finish_file (void);
 extern tree omp_m

[gomp4, committed] Revert omp_data_i init handling in ccp and forwprop

2015-11-05 Thread Tom de Vries

Hi,

This patch reverts the omp_data_i init handling in ccp and forwprop.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert omp_data_i init handling in ccp and forwprop

2015-11-05  Tom de Vries  <t...@codesourcery.com>

	Revert:
	2015-04-21  Tom de Vries  <t...@codesourcery.com>

	* tree-ssa-ccp.c: Include omp-low.h.
	(surely_varying_stmt_p, ccp_visit_stmt): Handle .omp_data_i init
	conservatively.
	* tree-ssa-forwprop.c: Include omp-low.h.
	(pass_forwprop::execute): Handle .omp_data_i init conservatively.
---
 gcc/tree-ssa-ccp.c  | 6 --
 gcc/tree-ssa-forwprop.c | 4 +---
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index cf93277..2831cfe 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -147,7 +147,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "builtins.h"
 #include "tree-chkp.h"
-#include "omp-low.h"
 
 
 /* Possible lattice values.  */
@@ -810,9 +809,6 @@ surely_varying_stmt_p (gimple *stmt)
   && gimple_code (stmt) != GIMPLE_CALL)
 return true;
 
-  if (gimple_stmt_omp_data_i_init_p (stmt))
-return true;
-
   return false;
 }
 
@@ -2368,8 +2364,6 @@ ccp_visit_stmt (gimple *stmt, edge *taken_edge_p, tree *output_p)
   switch (gimple_code (stmt))
 {
   case GIMPLE_ASSIGN:
-	if (gimple_stmt_omp_data_i_init_p (stmt))
-	  break;
 /* If the statement is an assignment that produces a single
output value, evaluate its RHS to see if the lattice value of
its output has changed.  */
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index bb08a4a..491178d 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -60,7 +60,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfgcleanup.h"
 #include "tree-into-ssa.h"
 #include "cfganal.h"
-#include "omp-low.h"
 
 /* This pass propagates the RHS of assignment statements into use
sites of the LHS of the assignment.  It's basically a specialized
@@ -2132,8 +2131,7 @@ pass_forwprop::execute (function *fun)
 	  tree lhs, rhs;
 	  enum tree_code code;
 
-	  if (!is_gimple_assign (stmt)
-	  || gimple_stmt_omp_data_i_init_p (stmt))
+	  if (!is_gimple_assign (stmt))
 	{
 	  gsi_next ();
 	  continue;
-- 
1.9.1



[PATCH] Fix transform_to_exit_first_loop_alt with -g

2015-11-06 Thread Tom de Vries

Hi,

This patch fixes a problem with -g compilation in 
transform_to_exit_first_loop_alt.


Consider test-case test.c:
...
void
f (int *a, int n)
{
  int i;
  for (i = 0; i < n; ++i)
a[i] = 1;
}
...

If we add a "checking_verify_ssa (true, true)" call at the end of 
transform_to_exit_first_loop_alt, and we compile with "-g -O2 
-ftree-parallelize-loops=4", we run into this ICE:

...
test.c: In function ‘f’:
test.c:2:1: error: definition in block 5 does not dominate use in block 13
for SSA_NAME: i_10 in statement:
# DEBUG i => i_10
test.c:2:1: internal compiler error: verify_ssa failed
...

Before transform_to_exit_first_loop_alt, the loop looks like:
...
  :

  :
  # ivtmp_22 = PHI <0(11), ivtmp_23(7)>
  i_13 = ivtmp_22;
  # DEBUG i => i_13
  _5 = (long unsigned int) i_13;
  _6 = _5 * 4;
  _8 = a_7(D) + _6;
  *_8 = 1;
  i_10 = i_13 + 1;
  # DEBUG i => i_10
  # DEBUG i => i_10
  if (ivtmp_22 < _1)
goto ;
  else
goto ;

  :
  ivtmp_23 = ivtmp_22 + 1;
  goto ;
...


And after transform_to_exit_first_loop_alt, it looks like:
...
  :
  goto ;

  :
  # ivtmp_22 = PHI <ivtmp_25(13)>
  i_13 = ivtmp_22;
  # DEBUG i => i_13
  _5 = (long unsigned int) i_13;
  _6 = _5 * 4;
  _8 = a_7(D) + _6;
  *_8 = 1;
  i_10 = i_13 + 1;
  goto ;

  :
  # ivtmp_25 = PHI <ivtmp_23(7), 0(11)>
  # DEBUG i => i_10
  # DEBUG i => i_10
  if (ivtmp_25 < _2)
goto ;
  else
goto ;

  :
  ivtmp_23 = ivtmp_22 + 1;
  goto ;
...

The ICE triggers because the use of i_10 in debug insn 'DEBUG i => i_10' 
in bb 13 is no longer dominated by the defition of i_10 in bb 5.


The patch fixes the ICE by ensuring that 
gimple_split_block_before_cond_jump really splits before cond_jump, 
instead of after the last nondebug insn before cond_jump, as it does 
now. This behaviour also better matches the rtl implementation of the 
cfghook. Btw, note that the only user of cfghook 
split_block_before_cond_jump is transform_to_exit_first_loop_alt.


[ A similar fix for an openacc variant of this ICE was committed on the 
gomp-4_0-branch: https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00060.html ]


Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
Fix transform_to_exit_first_loop_alt with -g

2015-11-06  Tom de Vries  <t...@codesourcery.com>

	* tree-cfg.c (gimple_split_block_before_cond_jump): Split before
	cond_jump, instead of split after last nondebug insn before cond_jump.
	* tree-parloops.c (transform_to_exit_first_loop_alt): Verify ssa before
	returning.
---
 gcc/tree-cfg.c  | 2 +-
 gcc/tree-parloops.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index cfed3c2..5d98eec 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -5783,7 +5783,7 @@ gimple_split_block_before_cond_jump (basic_block bb)
   if (gimple_code (last) != GIMPLE_COND
   && gimple_code (last) != GIMPLE_SWITCH)
 return NULL;
-  gsi_prev_nondebug ();
+  gsi_prev ();
   split_point = gsi_stmt (gsi);
   return split_block (bb, split_point)->dest;
 }
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 6c85634..3d41275 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1737,6 +1737,8 @@ transform_to_exit_first_loop_alt (struct loop *loop,
   /* Recalculate dominance info.  */
   free_dominance_info (CDI_DOMINATORS);
   calculate_dominance_info (CDI_DOMINATORS);
+
+  checking_verify_ssa (true, true);
 }
 
 /* Tries to moves the exit condition of LOOP to the beginning of its header
-- 
1.9.1



[gomp4, committed] Revert "Add dom_walker::walk_until"

2015-11-06 Thread Tom de Vries

Hi,

this patch reverts the "Add dom_walker::walk_until" patch.

The dom_walker::walk_until functionality is no longer required now that 
we've reverted pass_dominator::sese_mode_p.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Add dom_walker::walk_until"

2015-11-06  Tom de Vries  <t...@codesourcery.com>

	revert:
	2015-10-12  Tom de Vries  <t...@codesourcery.com>

	* domwalk.c (dom_walker::walk): Rename to ...
	(dom_walker::walk_until): ... this.  Add and handle until and
	until_inclusive parameters.
	(dom_walker::walk): Reimplement using dom_walker::walk_until.
	* domwalk.h (dom_walker::walk_until): Declare.
---
 gcc/domwalk.c | 32 +---
 gcc/domwalk.h |  2 --
 2 files changed, 5 insertions(+), 29 deletions(-)

diff --git a/gcc/domwalk.c b/gcc/domwalk.c
index 6a205f0..167fc38 100644
--- a/gcc/domwalk.c
+++ b/gcc/domwalk.c
@@ -143,18 +143,11 @@ cmp_bb_postorder (const void *a, const void *b)
 }
 
 /* Recursively walk the dominator tree.
-   BB is the basic block we are currently visiting.  UNTIL is a basic_block that
-   is the root of a subtree that we won't visit.  If UNTIL_INCLUSIVE, we visit
-   UNTIL, but not it's children.  Otherwise don't visit UNTIL and its
-   children.  */
+   BB is the basic block we are currently visiting.  */
 
 void
-dom_walker::walk_until (basic_block bb, basic_block until, bool until_inclusive)
+dom_walker::walk (basic_block bb)
 {
-  bool skip_self = (bb == until && !until_inclusive);
-  if (skip_self)
-return;
-
   basic_block dest;
   basic_block *worklist = XNEWVEC (basic_block,
    n_basic_blocks_for_fn (cfun) * 2);
@@ -188,15 +181,9 @@ dom_walker::walk_until (basic_block bb, basic_block until, bool until_inclusive)
 	  worklist[sp++] = NULL;
 
 	  int saved_sp = sp;
-	  bool skip_children = bb == until && until_inclusive;
-	  if (!skip_children)
-	for (dest = first_dom_son (m_dom_direction, bb);
-		 dest; dest = next_dom_son (m_dom_direction, dest))
-	  {
-		bool skip_child = (dest == until && !until_inclusive);
-		if (!skip_child)
-		  worklist[sp++] = dest;
-	  }
+	  for (dest = first_dom_son (m_dom_direction, bb);
+	   dest; dest = next_dom_son (m_dom_direction, dest))
+	worklist[sp++] = dest;
 	  if (m_dom_direction == CDI_DOMINATORS)
 	switch (sp - saved_sp)
 	  {
@@ -230,12 +217,3 @@ dom_walker::walk_until (basic_block bb, basic_block until, bool until_inclusive)
 }
   free (worklist);
 }
-
-/* Recursively walk the dominator tree.
-   BB is the basic block we are currently visiting.  */
-
-void
-dom_walker::walk (basic_block bb)
-{
-  walk_until (bb, NULL, true);
-}
diff --git a/gcc/domwalk.h b/gcc/domwalk.h
index 71e6075..71a7c47 100644
--- a/gcc/domwalk.h
+++ b/gcc/domwalk.h
@@ -34,8 +34,6 @@ public:
 
   /* Walk the dominator tree.  */
   void walk (basic_block);
-  /* Walk a part of the dominator tree.  */
-  void walk_until (basic_block, basic_block, bool);
 
   /* Function to call before the recursive walk of the dominator children.  */
   virtual void before_dom_children (basic_block) {}
-- 
1.9.1



[gomp4, committed] Fix double word typo in tree-inline.c

2015-11-06 Thread Tom de Vries

Hi,

reverting a patch in tree-inline.c in gomp-4_0-branch exposed a typo 
already fixed on trunk.  This patch fixes that.


Committed to gomp-4_0-branch.

Thanks,
- Tom

2015-11-06  Tom de Vries  <t...@codesourcery.com>

	backport from trunk:
	2015-07-12  Aldy Hernandez  <al...@redhat.com>

	* tree-inline.c: Fix double word typos.
---
 gcc/tree-inline.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 3d06e6e..884131f 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4540,7 +4540,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   id->src_cfun = DECL_STRUCT_FUNCTION (fn);
   id->call_stmt = stmt;
 
-  /* If the the src function contains an IFN_VA_ARG, then so will the dst
+  /* If the src function contains an IFN_VA_ARG, then so will the dst
  function after inlining.  */
   if ((id->src_cfun->curr_properties & PROP_gimple_lva) == 0)
 {
-- 
1.9.1



[gomp4, committed] Remove DEF_GOACC_BUILTIN_FNSPEC

2015-11-06 Thread Tom de Vries

[ was: Re: [gomp4, committed] Revert "Add IFN_GOACC_DATA_END_WITH_ARG" ]

On 06/11/15 13:03, Tom de Vries wrote:


Now that we've got -foffload-alias, we're no longer concerned about
GOACC builtins being alias analysis optimization barriers, so the
IFN_GOACC_DATA_END_WITH_ARG patch has become obsolete.


Likewse, DEF_GOACC_BUILTIN_FNSPEC has become obsolete.

This patch removes DEF_GOACC_BUILTIN_FNSPEC and associated code.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove DEF_GOACC_BUILTIN_FNSPEC

2015-11-06  Tom de Vries  <t...@codesourcery.com>

	* builtins.def (DEF_GOACC_BUILTIN_FNSPEC): Remove #undef and #define.
	* omp-builtins.def: Remove DEF_GOACC_BUILTIN_FNSPEC.

	* f95-lang.c (gfc_init_builtin_functions): Remove.
	(DEF_GOACC_BUILTIN_FNSPEC): Remove #undef and #define.
---
 gcc/builtins.def   |  7 ---
 gcc/fortran/f95-lang.c | 32 
 gcc/omp-builtins.def   | 24 ++--
 3 files changed, 10 insertions(+), 53 deletions(-)

diff --git a/gcc/builtins.def b/gcc/builtins.def
index d60b037..886b45c 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -174,13 +174,6 @@ along with GCC; see the file COPYING3.  If not see
 	   false, true, true, ATTRS, false, \
 	   (flag_openacc \
 		|| flag_offload_abi != OFFLOAD_ABI_UNSET))
-/* Like DEF_GOACC_BUILTIN, but with an fn spec attribute.
-   KLUDGE: The ATTRS field needs to be a combination of ATTRS2 and FNSPEC.
-   In this file, we use the ATTRS field, and in gcc/fortran/f95-lang.c, we use
-   ATTRS2 and FNSPEC instead.  */
-#undef DEF_GOACC_BUILTIN_FNSPEC
-#define DEF_GOACC_BUILTIN_FNSPEC(ENUM, NAME, TYPE, ATTRS, ATTRS2, FNSPEC) \
-  DEF_GOACC_BUILTIN(ENUM, NAME, TYPE, ATTRS)
 #undef DEF_GOACC_BUILTIN_COMPILER
 #define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,\
diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index 56a30ca..a63ebb3 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -563,27 +563,6 @@ gfc_define_builtin (const char *name, tree type, enum built_in_function code,
   set_builtin_decl (code, decl, true);
 }
 
-/* Like gfc_define_builtin, but with fn spec attribute FNSPEC.  */
-
-static void ATTRIBUTE_UNUSED
-gfc_define_builtin_with_spec (const char *name, tree fntype,
-			  enum built_in_function code,
-			  const char *library_name, int attr,
-			  const char *fnspec)
-{
-  if (fnspec)
-{
-  /* Code copied from build_library_function_decl_1.  */
-  tree attr_args = build_tree_list (NULL_TREE,
-	build_string (strlen (fnspec), fnspec));
-  tree attrs = tree_cons (get_identifier ("fn spec"),
-			  attr_args, TYPE_ATTRIBUTES (fntype));
-  fntype = build_type_attribute_variant (fntype, attrs);
-}
-
-  gfc_define_builtin (name, fntype, code, library_name, attr);
-}
-
 #define DO_DEFINE_MATH_BUILTIN(code, name, argtype, tbase) \
 gfc_define_builtin ("__builtin_" name "l", tbase##longdouble[argtype], \
 			BUILT_IN_ ## code ## L, name "l", \
@@ -1236,12 +1215,6 @@ gfc_init_builtin_functions (void)
 #define DEF_GOACC_BUILTIN(code, name, type, attr) \
   gfc_define_builtin ("__builtin_" name, builtin_types[type], \
 			  code, name, attr);
-/* Like DEF_GOACC_BUILTIN, but with an fn spec attribute.
-   KLUDGE: See gcc/builtins.def DEF_GOACC_BUILTIN_FNSPEC comment.  */
-#undef DEF_GOACC_BUILTIN_FNSPEC
-#define DEF_GOACC_BUILTIN_FNSPEC(code, name, type, attr, attr2, fnspec)	\
-  gfc_define_builtin_with_spec ("__builtin_" name, builtin_types[type], \
-code, name, attr2, fnspec);
 #undef DEF_GOACC_BUILTIN_COMPILER
 #define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) \
   gfc_define_builtin (name, builtin_types[type], code, name, attr);
@@ -1249,7 +1222,6 @@ gfc_init_builtin_functions (void)
 #define DEF_GOMP_BUILTIN(code, name, type, attr) /* ignore */
 #include "../omp-builtins.def"
 #undef DEF_GOACC_BUILTIN
-#undef DEF_GOACC_BUILTIN_FNSPEC
 #undef DEF_GOACC_BUILTIN_COMPILER
 #undef DEF_GOMP_BUILTIN
 }
@@ -1258,9 +1230,6 @@ gfc_init_builtin_functions (void)
 {
 #undef DEF_GOACC_BUILTIN
 #define DEF_GOACC_BUILTIN(code, name, type, attr) /* ignore */
-#undef DEF_GOACC_BUILTIN_FNSPEC
-#define DEF_GOACC_BUILTIN_FNSPEC(code, name, type, attr, attr2, fnspec)	\
-  /* Ignore.  */
 #undef DEF_GOACC_BUILTIN_COMPILER
 #define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr)  /* ignore */
 #undef DEF_GOMP_BUILTIN
@@ -1269,7 +1238,6 @@ gfc_init_builtin_functions (void)
 			  code, name, attr);
 #include "../omp-builtins.def"
 #undef DEF_GOACC_BUILTIN
-#undef DEF_GOACC_BUILTIN_FNSPEC
 #undef DEF_GOACC_BUILTIN_COMPILER
 #undef DEF_GOMP_BUILTIN
 }
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 6908f94..1504a48 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -21,7 +21

Revert "Add kernels-loop-nest-independent.f95"

2015-11-06 Thread Tom de Vries

Hi,

this patch reverts aanother independent clause in oacc kernels region 
related test-case.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Add kernels-loop-nest-independent.f95"

2015-07-15  Tom de Vries  <t...@codesourcery.com>

	* gfortran.dg/goacc/kernels-loop-nest-independent.f95: New test.

	* testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95: New
	test.
---
 .../goacc/kernels-loop-nest-independent.f95| 41 --
 .../kernels-loop-nest-independent.f95  | 28 ---
 2 files changed, 69 deletions(-)
 delete mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95
 delete mode 100644 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95

diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95
deleted file mode 100644
index af151ca..000
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95
+++ /dev/null
@@ -1,41 +0,0 @@
-! { dg-do compile }
-! { dg-additional-options "-O2" }
-! { dg-additional-options "-ftree-parallelize-loops=32" }
-! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
-! { dg-additional-options "-fdump-tree-optimized" }
-
-! Based on autopar/outer-1.c.
-
-program main
-  implicit none
-  integer, parameter :: n = 500
-  integer, dimension (0:n-1, 0:n-1) :: x
-  integer:: i, j, ii, jj
-
-
-  !$acc kernels copyout (x)
-  !$acc loop independent
-  do ii = 0, n - 1
- do jj = 0, n - 1
-x(jj, ii) = ii + jj + 3
- end do
-  end do
-  !$acc end kernels
-
-  do i = 0, n - 1
- do j = 0, n - 1
-if (x(i, j) .ne. i + j + 3) call abort
- end do
-  end do
-
-end program main
-
-! Check that only one loop is analyzed, and that it can be parallelized.
-! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } }
-! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
-! { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops_oacc_kernels" } }
-
-! Check that the loop has been split off into a function.
-! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops_oacc_kernels" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95
deleted file mode 100644
index 87a3d23..000
--- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-nest-independent.f95
+++ /dev/null
@@ -1,28 +0,0 @@
-! { dg-do run }
-! { dg-options "-ftree-parallelize-loops=32" }
-
-! Based on autopar/outer-1.c.
-
-program main
-  implicit none
-  integer, parameter :: n = 500
-  integer, dimension (0:n-1, 0:n-1) :: x
-  integer:: i, j, ii, jj
-
-
-  !$acc kernels copyout (x)
-  !$acc loop independent
-  do ii = 0, n - 1
- do jj = 0, n - 1
-x(jj, ii) = ii + jj + 3
- end do
-  end do
-  !$acc end kernels
-
-  do i = 0, n - 1
- do j = 0, n - 1
-if (x(i, j) .ne. i + j + 3) call abort
- end do
-  end do
-
-end program main
-- 
1.9.1



[gomp4, committed] Revert "Use marked_independent in oacc kernels region"

2015-11-06 Thread Tom de Vries

Hi,

this patch reverts the independent clause support in the oacc kernels 
region.


The independent clause support is broken, in a subtle way. We currently 
set the marked_independent field in struct loop for loops with the 
independent clause in a kernels region. So that property holds for all 
the loads and stores present at source level. But, at omp-lowering, we 
introduce new loads and stores. Those new load and stores are supposed 
to be eliminated from the loop by the kernels pass group. But in 
general, we can't guarantuee that that happens. So, at parloops, we 
cannot assume based on marked_independent that in fact all loads and 
stores in the loop body are independent.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Use marked_independent in oacc kernels region"

2015-10-20  Tom de Vries  <t...@codesourcery.com>

	Revert:
	2015-07-14  Tom de Vries  <t...@codesourcery.com>

	* tree-parloops.c (parallelize_loops): Use marked_independent flag in
	oacc kernels region.

	* c-c++-common/goacc/kernels-independent.c: New test.

	* testsuite/libgomp.oacc-c-c++-common/kernels-independent.c: New test.
---
 .../c-c++-common/goacc/kernels-independent.c   | 41 
 gcc/tree-parloops.c| 21 ++
 .../kernels-independent.c  | 45 --
 3 files changed, 3 insertions(+), 104 deletions(-)
 delete mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-independent.c
 delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c b/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
deleted file mode 100644
index 1f36323..000
--- a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
-/* { dg-additional-options "-fdump-tree-optimized" } */
-
-#include 
-
-#define N (1024 * 512)
-#define COUNTERTYPE unsigned int
-
-void
-foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
-{
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-a[i] = i * 2;
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-b[i] = i * 4;
-
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
-  {
-#pragma acc loop independent
-for (COUNTERTYPE ii = 0; ii < N; ii++)
-  c[ii] = a[ii] + b[ii];
-  }
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-if (c[i] != a[i] + b[i])
-  abort ();
-}
-
-/* Check that only one loop is analyzed, and that it can be parallelized.  */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } } */
-/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
-
-/* Check that the loop has been split off into a function.  */
-/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops_oacc_kernels" } } */
-
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 05827d1..b4039ad 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -3258,24 +3258,9 @@ parallelize_loops (bool oacc_kernels_p)
   if (!try_create_reduction_list (loop, _list, oacc_kernels_p))
 	continue;
 
-  if (!flag_loop_parallelize_all)
-	{
-	  bool independent = (oacc_kernels_p
-			  && loop->marked_independent);
-
-	  if (independent)
-	{
-	  if (dump_file
-		  && (dump_flags & TDF_DETAILS))
-		fprintf (dump_file,
-			 "  SUCCESS: may be parallelized, marked independent\n");
-	}
-	  else
-	independent = loop_parallel_p (loop, _obstack);
-
-	  if (!independent)
-	continue;
-	}
+  if (!flag_loop_parallelize_all
+	  && !loop_parallel_p (loop, _obstack))
+	continue;
 
   if (oacc_kernels_p
 	&& !oacc_entry_exit_ok (loop, _list))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
deleted file mode 100644
index d169a5f..000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
-#include 
-
-#define N (1024 * 512)
-#define COUNTERTYPE unsigned int
-
-void __attribute__((noinline,noclone))
-foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
-{
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-a[i] = i * 2;
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-b[i] = i * 4;
-
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
-  {
-#pragma acc loop i

[gomp4, committed] Revert "Add c-c++-common/goacc/kernels-loop-nest-independent.c"

2015-11-06 Thread Tom de Vries

Hi,

this patch revert a independent clause in oacc kernels region related 
test-case.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Add c-c++-common/goacc/kernels-loop-nest-independent.c"

2015-10-2  Tom de Vries  <t...@codesourcery.com>

	Revert:
	2015-07-15  Tom de Vries  <t...@codesourcery.com>

	* c-c++-common/goacc/kernels-loop-nest-independent.c: New test.
---
 .../goacc/kernels-loop-nest-independent.c  | 40 --
 1 file changed, 40 deletions(-)
 delete mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c
deleted file mode 100644
index 1996865..000
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
-/* { dg-additional-options "-fdump-tree-optimized" } */
-
-/* Based on autopar/outer-1.c.  */
-
-#include 
-
-#define N 1000
-
-int
-main (void)
-{
-  int x[N][N];
-
-#pragma acc kernels copyout (x)
-  {
-#pragma acc loop independent
-for (int ii = 0; ii < N; ii++)
-  for (int jj = 0; jj < N; jj++)
-	x[ii][jj] = ii + jj + 3;
-  }
-
-  for (int i = 0; i < N; i++)
-for (int j = 0; j < N; j++)
-  if (x[i][j] != i + j + 3)
-	abort ();
-
-  return 0;
-}
-
-/* Check that only one loop is analyzed, and that it can be parallelized.  */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } } */
-/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
-
-/* Check that the loop has been split off into a function.  */
-/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops_oacc_kernels" } } */
-- 
1.9.1



[gomp4, committed] Remove unused DEF_ATTR_FOR_STRING and DEF_ATTR_TREE_LIST

2015-11-06 Thread Tom de Vries

Hi,

this patch removes some unused DEF_ATTR_FOR_STRING and DEF_ATTR_TREE_LIST.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Remove unused DEF_ATTR_FOR_STRING and DEF_ATTR_TREE_LIST

2015-11-06  Tom de Vries  <t...@codesourcery.com>

	* builtin-attrs.def (DOT_DOT_DOT_r_r_r, DOT_DOT_DOT_DOT_r_r_r): Remove
	unused DEF_ATTR_FOR_STRING.
	(ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST)
	(ATTR_FNSPEC_DOT_DOT_DOT_DOT_r_r_r_NOTHROW_LIST): Remove unused
	DEF_ATTR_TREE_LIST.
---
 gcc/builtin-attrs.def | 8 
 1 file changed, 8 deletions(-)

diff --git a/gcc/builtin-attrs.def b/gcc/builtin-attrs.def
index da3ae80..1338644 100644
--- a/gcc/builtin-attrs.def
+++ b/gcc/builtin-attrs.def
@@ -64,8 +64,6 @@ DEF_ATTR_FOR_INT (6)
   DEF_ATTR_TREE_LIST (ATTR_LIST_##ENUM, ATTR_NULL,	\
 		  ATTR_##ENUM, ATTR_NULL)
 DEF_ATTR_FOR_STRING (STR1, "1")
-DEF_ATTR_FOR_STRING (DOT_DOT_DOT_r_r_r, "...rrr")
-DEF_ATTR_FOR_STRING (DOT_DOT_DOT_DOT_r_r_r, "rrr")
 #undef DEF_ATTR_FOR_STRING
 
 /* Construct a tree for a list of two integers.  */
@@ -129,12 +127,6 @@ DEF_ATTR_TREE_LIST (ATTR_PURE_NOTHROW_LIST, ATTR_PURE,		\
 			ATTR_NULL, ATTR_NOTHROW_LIST)
 DEF_ATTR_TREE_LIST (ATTR_PURE_NOTHROW_LEAF_LIST, ATTR_PURE,	\
 			ATTR_NULL, ATTR_NOTHROW_LEAF_LIST)
-DEF_ATTR_TREE_LIST (ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST, \
-		ATTR_FNSPEC, ATTR_LIST_DOT_DOT_DOT_r_r_r, \
-		ATTR_NOTHROW_LIST)
-DEF_ATTR_TREE_LIST (ATTR_FNSPEC_DOT_DOT_DOT_DOT_r_r_r_NOTHROW_LIST, \
-		ATTR_FNSPEC, ATTR_LIST_DOT_DOT_DOT_DOT_r_r_r, \
-		ATTR_NOTHROW_LIST)
 DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LIST, ATTR_NORETURN,	\
 			ATTR_NULL, ATTR_NOTHROW_LIST)
 DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LEAF_LIST, ATTR_NORETURN,\
-- 
1.9.1



[gomp4, committed] Revert "Add IFN_GOACC_DATA_END_WITH_ARG"

2015-11-06 Thread Tom de Vries

Hi,

I've reverted the patch that added IFN_GOACC_DATA_END_WITH_ARG ( 
https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02661.html ).


The patch attempted to fix a test failure, while at the same time 
keeping the GOACC_data_start fnspec attributes to prevent it from 
becoming an alias analysis optimization barrier.


Now that we've got -foffload-alias, we're no longer concerned about 
GOACC builtins being alias analysis optimization barriers, so the 
IFN_GOACC_DATA_END_WITH_ARG patch has become obsolete.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Revert "Add IFN_GOACC_DATA_END_WITH_ARG"

2015-10-05  Tom de Vries  <t...@codesourcery.com>

	revert:
	2015-05-28  Tom de Vries  <t...@codesourcery.com>

	PR tree-optimization/65419
	* cfgexpand.c (pass_data_expand): Add PROP_gimple_lompifn to
	properties_required field.
	* gimplify.c (gimplify_omp_workshare): Use IFN_GOACC_DATA_END_WITH_ARG
	instead of BUILT_IN_GOACC_DATA_END.  Clear PROP_gimple_lompifn in
	curr_properties.
	(gimplify_function_tree): Tentatively set PROP_gimple_lompifn in
	curr_properties.
	* internal-fn.c (expand_GOACC_DATA_END_WITH_ARG): New dummy function.
	* internal-fn.def (GOACC_DATA_END_WITH_ARG): New DEF_INTERNAL_FN.
	* omp-low.c (lower_omp_target): Set argument of GOACC_DATA_END_WITH_ARG.
	(pass_data_late_lower_omp): New pass_data.
	(pass_late_lower_omp): New pass.
	(pass_late_lower_omp::gate, pass_late_lower_omp::execute)
	(make_pass_late_lower_omp): New function.
	* passes.def: Add pass_late_lower_omp.
	* tree-inline.c (expand_call_inline): Handle PROP_gimple_lompifn.
	* tree-pass.h (PROP_gimple_lompifn): Add define.

	* testsuite/libgomp.oacc-c-c++-common/goacc-data-end.c: New test.
---
 gcc/cfgexpand.c|  3 +-
 gcc/gimplify.c | 25 ++-
 gcc/internal-fn.c  |  9 ---
 gcc/internal-fn.def|  1 -
 gcc/omp-low.c  | 86 +-
 gcc/passes.def |  1 -
 gcc/tree-inline.c  | 16 ++--
 gcc/tree-pass.h|  2 -
 .../libgomp.oacc-c-c++-common/goacc-data-end.c | 67 -
 9 files changed, 14 insertions(+), 196 deletions(-)
 delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/goacc-data-end.c

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index ca52d3d..bfbc958 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -6060,8 +6060,7 @@ const pass_data pass_data_expand =
   ( PROP_ssa | PROP_gimple_leh | PROP_cfg
 | PROP_gimple_lcx
 | PROP_gimple_lvec
-| PROP_gimple_lva
-| PROP_gimple_lompifn), /* properties_required */
+| PROP_gimple_lva), /* properties_required */
   PROP_rtl, /* properties_provided */
   ( PROP_ssa | PROP_trees ), /* properties_destroyed */
   0, /* todo_flags_start */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 6283f0c..a5e28b4 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8960,32 +8960,20 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
 	pop_gimplify_context (NULL);
   if (ort == ORT_TARGET_DATA)
 	{
+	  enum built_in_function end_ix;
 	  switch (TREE_CODE (expr))
 	{
 	case OACC_DATA:
-	  /* Rather than building a call to BUILT_IN_GOACC_DATA_END, we use
-		 this ifn which is similar, but has a pointer argument, which
-		 will be later set to the &.omp_data_arr of the corresponding
-		 BUILT_IN_GOACC_DATA_START.
-		 This allows us to pretend that the &.omp_data_arr argument of
-		 BUILT_IN_GOACC_DATA_START does not escape.  */
-	  g = gimple_build_call_internal (IFN_GOACC_DATA_END_WITH_ARG, 1,
-	  null_pointer_node);
-	  /* Clear the tentatively set PROP_gimple_lompifn, to indicate that
-		 IFN_GOACC_DATA_END_WITH_ARG needs to be expanded.  The argument
-		 is not abi-compatible with the GOACC_data_end function, which
-		 has no arguments.  */
-	  cfun->curr_properties &= ~PROP_gimple_lompifn;
+	  end_ix = BUILT_IN_GOACC_DATA_END;
 	  break;
 	case OMP_TARGET_DATA:
-	  {
-		tree fn = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_END_DATA);
-		g = gimple_build_call (fn, 0);
-	  }
+	  end_ix = BUILT_IN_GOMP_TARGET_END_DATA;
 	  break;
 	default:
 	  gcc_unreachable ();
 	}
+	  tree fn = builtin_decl_explicit (end_ix);
+	  g = gimple_build_call (fn, 0);
 	  gimple_seq cleanup = NULL;
 	  gimple_seq_add_stmt (, g);
 	  g = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY);
@@ -10939,9 +10927,6 @@ gimplify_function_tree (tree fndecl)
  if necessary.  */
   cfun->curr_properties |= PROP_gimple_lva;
 
-  /* Tentatively set PROP_gimple_lompifn.  */
-  cfun->curr_properties |= PROP_gimple_lompifn;
-
   for (parm = DECL_ARGUMENTS (fndecl); parm ; parm = DECL_CHAIN (parm))
 {
   /* Preliminarily mark non-addressed complex variables as eligible
diff --

Re: [gomp4, committed] Implement -foffload-alias

2015-11-03 Thread Tom de Vries

On 03/11/15 15:19, Tom de Vries wrote:

I've dropped the two testcases from this patch, I'll commit in a
follow-up patch.


Committed to gomp-4_0-branch, as attached.

Thanks,
- Tom
Add goacc/kernels-loop-offload-alias-{none,ptr}.c

2015-11-03  Tom de Vries  <t...@codesourcery.com>

	* c-c++-common/goacc/kernels-loop-offload-alias-none.c: New test.
	* c-c++-common/goacc/kernels-loop-offload-alias-ptr.c: New test.
---
 .../goacc/kernels-loop-offload-alias-none.c| 61 ++
 .../goacc/kernels-loop-offload-alias-ptr.c | 44 
 2 files changed, 105 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
new file mode 100644
index 000..bb96330
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
@@ -0,0 +1,61 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+/* { dg-additional-options "-fdump-tree-alias-all" } */
+/* { dg-additional-options "-foffload-alias=none" } */
+
+#include 
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+static void
+foo (unsigned int *a, unsigned int *b, unsigned int *c)
+{
+  for (COUNTERTYPE i = 0; i < N; i++)
+a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+for (COUNTERTYPE ii = 0; ii < N; ii++)
+  c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+if (c[i] != a[i] + b[i])
+  abort ();
+}
+
+int
+main (void)
+{
+  unsigned int *a;
+  unsigned int *b;
+  unsigned int *c;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  foo (a, b, c);
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 6" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 7" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 9 "alias" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
new file mode 100644
index 000..de4f45a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+/* { dg-additional-options "-fdump-tree-alias-all" } */
+/* { dg-additional-options "-foffload-alias=pointer" } */
+
+#include 
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+
+int
+main (void)
+{
+  for (COUNTERTYPE i = 0; i < N; i++)
+a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+for (COUNTERTYPE ii = 0; ii < N; ii++)
+  c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+if (c[i] != a[i] + b[i])
+  abort ();
+
+  return 0;
+}
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 6 "alias" } } */
-- 
1.9.1



Re: [PATCH, 2/2] Handle recursive restrict in function parameter

2015-11-03 Thread Tom de Vries

On 01/11/15 19:20, Tom de Vries wrote:

On 01/11/15 19:03, Tom de Vries wrote:

So, the new patch series is:

  1Rename make_restrict_var_constraints to make_param_constraints
  2Handle recursive restrict in function parameter

I'll repost in reply to this message.



This patch adds handling of all the restrict qualifiers in the type of a
function parameter.



And reposting an updated version, now that the toplevel parameter in 
make_param_constraints has been eliminated.


Thanks,
- Tom

Handle recursive restrict in function parameter

	* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
	(push_fields_onto_fieldstack): Add and handle handle_param parameter.
	(create_variable_info_for_1): Add and handle
	handle_param parameter.  Add extra arg to call to
	push_fields_onto_fieldstack.  Handle restrict pointer fields.
	(create_variable_info_for): Call create_variable_info_for_1 with extra
	arg.
	(make_param_constraints): Drop restrict_name parameter.  Ignore
	vi->only_restrict_pointers.
	(intra_create_variable_infos): Call create_variable_info_for_1 with
	extra arg.  Remove restrict handling.  Call make_param_constraints with
	one less arg.

	* gcc.dg/tree-ssa/restrict-7.c: New test.
	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 ++
 gcc/tree-ssa-structalias.c | 90 ++
 3 files changed, 83 insertions(+), 36 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
new file mode 100644
index 000..f7a68c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int
+f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
+{
+  *b = 1;
+  ***a  = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new file mode 100644
index 000..b0ab164
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+struct s
+{
+  int *__restrict__ *__restrict__ pp;
+};
+
+int
+f (struct s s, int *b)
+{
+  *b = 1;
+  **s.pp = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index ded5a1e..3c65db8 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -307,6 +307,7 @@ static varinfo_t first_or_preceding_vi_for_offset (varinfo_t,
 		   unsigned HOST_WIDE_INT);
 static varinfo_t lookup_vi_for_tree (tree);
 static inline bool type_can_have_subvars (const_tree);
+static void make_param_constraints (varinfo_t);
 
 /* Pool of variable info structures.  */
 static object_allocator variable_info_pool
@@ -393,6 +394,7 @@ new_var_info (tree t, const char *name, bool add_id)
   return ret;
 }
 
+static varinfo_t create_variable_info_for_1 (tree, const char *, bool, bool);
 
 /* A map mapping call statements to per-stmt variables for uses
and clobbers specific to the call.  */
@@ -5195,6 +5197,8 @@ struct fieldoff
   unsigned may_have_pointers : 1;
 
   unsigned only_restrict_pointers : 1;
+
+  varinfo_t restrict_var;
 };
 typedef struct fieldoff fieldoff_s;
 
@@ -5289,11 +5293,12 @@ field_must_have_pointers (tree t)
OFFSET is used to keep track of the offset in this entire
structure, rather than just the immediately containing structure.
Returns false if the caller is supposed to handle the field we
-   recursed for.  */
+   recursed for.  If HANDLE_PARAM is set, we're handling part of a function
+   parameter.  */
 
 static bool
 push_fields_onto_fieldstack (tree type, vec *fieldstack,
-			 HOST_WIDE_INT offset)
+			 HOST_WIDE_INT offset, bool handle_param)
 {
   tree field;
   bool empty_p = true;
@@ -5319,7 +5324,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	|| TREE_CODE (field_type) == UNION_TYPE)
 	  push = true;
 	else if (!push_fields_onto_fieldstack
-		(field_type, fieldstack, offset + foff)
+		(field_type, fieldstack, offset + foff, handle_param)
 		 && (DECL_SIZE (field)
 		 && !integer_zerop (DECL_SIZE (field
 	  /* Empty structures may have actual size, like in C++.  So
@@ -5340,7 +5345,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	if (!pair
 		&& offset + foff != 0)
 	  {
-		fieldoff_s e = {0, offset + foff, false, false, false, false};
+		fieldoff_s e = {0, offset + foff, false, false, false, false,
+NULL};
 		pair = fieldstack-&g

[gomp4,committed] Handle recursive restrict in function parameter

2015-11-03 Thread Tom de Vries

[ was: Re: [PATCH, 2/2] Handle recursive restrict in function parameter ]

On 03/11/15 14:46, Tom de Vries wrote:

On 01/11/15 19:20, Tom de Vries wrote:

On 01/11/15 19:03, Tom de Vries wrote:

So, the new patch series is:

  1Rename make_restrict_var_constraints to
make_param_constraints
  2Handle recursive restrict in function parameter

I'll repost in reply to this message.



This patch adds handling of all the restrict qualifiers in the type of a
function parameter.



And reposting an updated version, now that the toplevel parameter in
make_param_constraints has been eliminated.



And committed to gomp-4_0-branch.

Thanks,
- Tom


0001-Handle-recursive-restrict-in-function-parameter.patch


Handle recursive restrict in function parameter

* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
(push_fields_onto_fieldstack): Add and handle handle_param parameter.
(create_variable_info_for_1): Add and handle
handle_param parameter.  Add extra arg to call to
push_fields_onto_fieldstack.  Handle restrict pointer fields.
(create_variable_info_for): Call create_variable_info_for_1 with extra
arg.
(make_param_constraints): Drop restrict_name parameter.  Ignore
vi->only_restrict_pointers.
(intra_create_variable_infos): Call create_variable_info_for_1 with
extra arg.  Remove restrict handling.  Call make_param_constraints with
one less arg.

* gcc.dg/tree-ssa/restrict-7.c: New test.
* gcc.dg/tree-ssa/restrict-8.c: New test.
---
  gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 
  gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 ++
  gcc/tree-ssa-structalias.c | 90 ++
  3 files changed, 83 insertions(+), 36 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
new file mode 100644
index 000..f7a68c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int
+f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
+{
+  *b = 1;
+  ***a  = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new file mode 100644
index 000..b0ab164
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+struct s
+{
+  int *__restrict__ *__restrict__ pp;
+};
+
+int
+f (struct s s, int *b)
+{
+  *b = 1;
+  **s.pp = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index ded5a1e..3c65db8 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -307,6 +307,7 @@ static varinfo_t first_or_preceding_vi_for_offset 
(varinfo_t,
   unsigned HOST_WIDE_INT);
  static varinfo_t lookup_vi_for_tree (tree);
  static inline bool type_can_have_subvars (const_tree);
+static void make_param_constraints (varinfo_t);

  /* Pool of variable info structures.  */
  static object_allocator variable_info_pool
@@ -393,6 +394,7 @@ new_var_info (tree t, const char *name, bool add_id)
return ret;
  }

+static varinfo_t create_variable_info_for_1 (tree, const char *, bool, bool);

  /* A map mapping call statements to per-stmt variables for uses
 and clobbers specific to the call.  */
@@ -5195,6 +5197,8 @@ struct fieldoff
unsigned may_have_pointers : 1;

unsigned only_restrict_pointers : 1;
+
+  varinfo_t restrict_var;
  };
  typedef struct fieldoff fieldoff_s;

@@ -5289,11 +5293,12 @@ field_must_have_pointers (tree t)
 OFFSET is used to keep track of the offset in this entire
 structure, rather than just the immediately containing structure.
 Returns false if the caller is supposed to handle the field we
-   recursed for.  */
+   recursed for.  If HANDLE_PARAM is set, we're handling part of a function
+   parameter.  */

  static bool
  push_fields_onto_fieldstack (tree type, vec *fieldstack,
-HOST_WIDE_INT offset)
+HOST_WIDE_INT offset, bool handle_param)
  {
tree field;
bool empty_p = true;
@@ -5319,7 +5324,7 @@ push_fields_onto_fieldstack (tree type, vec 
*fieldstack,
|| TREE_CODE (field_type) == UNION_TYPE)
  push = true;
else if (!push_fields_onto_fieldstack
-   (field_type, fieldstack, offset + foff)
+   (field_type, fieldstack, off

[gomp4, committed] Implement -foffload-alias

2015-11-03 Thread Tom de Vries

[ was: Re: [gomp4, WIP] Implement -foffload-alias ]

On 28/09/15 17:38, Tom de Vries wrote:

Hi,

this work-in-progress patch implements a new option
-foffload-alias=<none|pointer|all>.


The option -foffload-alias=none instructs the compiler to assume that
objects references and pointer dereferences in an offload region do not
alias.

The option -foffload-alias=pointer instructs the compiler to assume that
objects references in an offload region do not alias.

The option -foffload-alias=all instructs the compiler to make no
assumptions about aliasing in offload regions.

The default value is -foffload-alias=none.


The patch works by adding restrict to the types of the fields used to
pass data to an offloading region.



Updated patch attached, committed to gomp-4_0-branch.


Atm, the kernels-loop-offload-alias-ptr.c test-case passes, but the
kernels-loop-offload-alias-none.c test-case fails.


I've dropped the two testcases from this patch, I'll commit in a 
follow-up patch.



For the latter, the
required amount of restrict is added, but it has no effect. I've
reported this in a more basic form in PR67742: "3rd-level restrict
ignored".


I've committed a fix for that PR as reported here: 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00204.html .


Furthermore, I've added support for the option in the 'mask & 4' case in 
install_var_field, I ran into this when trying out some Fortran test-cases.


Thanks,
- Tom

Implement -foffload-alias

2015-09-28  Tom de Vries  <t...@codesourcery.com>

	* common.opt (foffload-alias): New option.
	* flag-types.h (enum offload_alias): New enum.
	* omp-low.c (install_var_field): Handle flag_offload_alias.
	* doc/invoke.texi (@item Code Generation Options): Add -foffload-alias.
	(@item -foffload-alias): New item.
---
 gcc/common.opt  | 16 
 gcc/doc/invoke.texi | 11 +++
 gcc/flag-types.h|  7 +++
 gcc/omp-low.c   | 28 ++--
 4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index c85ab49..135e777 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1738,6 +1738,22 @@ Enum(offload_abi) String(ilp32) Value(OFFLOAD_ABI_ILP32)
 EnumValue
 Enum(offload_abi) String(lp64) Value(OFFLOAD_ABI_LP64)
 
+foffload-alias=
+Common Joined RejectNegative Enum(offload_alias) Var(flag_offload_alias) Init(OFFLOAD_ALIAS_NONE)
+-foffload-alias=[all|pointer|none] Assume non-aliasing in an offload region
+
+Enum
+Name(offload_alias) Type(enum offload_alias) UnknownError(unknown offload aliasing %qs)
+
+EnumValue
+Enum(offload_alias) String(all) Value(OFFLOAD_ALIAS_ALL)
+
+EnumValue
+Enum(offload_alias) String(pointer) Value(OFFLOAD_ALIAS_POINTER)
+
+EnumValue
+Enum(offload_alias) String(none) Value(OFFLOAD_ALIAS_NONE)
+
 fomit-frame-pointer
 Common Report Var(flag_omit_frame_pointer) Optimization
 When possible do not generate stack frames.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 5a07512..8967f88 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1142,6 +1142,7 @@ See S/390 and zSeries Options.
 -finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol
 -finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{} @gol
 -fno-common  -fno-ident @gol
+-foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]} @gol
 -fpcc-struct-return  -fpic  -fPIC -fpie -fPIE -fno-plt @gol
 -fno-jump-tables @gol
 -frecord-gcc-switches @gol
@@ -23842,6 +23843,16 @@ The options @option{-ftrapv} and @option{-fwrapv} override each other, so using
 using @option{-ftrapv} @option{-fwrapv} @option{-fno-wrapv} on the command-line
 results in @option{-ftrapv} being effective.
 
+@item -foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]}
+@opindex -foffload-alias
+The option @option{-foffload-alias=none} instructs the compiler to assume that
+objects references and pointer dereferences in an offload region do not alias.
+The option @option{-foffload-alias=pointer} instruct the compiler to assume that
+objects references in an offload region do not alias.  The option
+@option{-foffload-alias=all} instructs the compiler to make no assumptions about
+aliasing in offload regions.  The default value is
+@option{-foffload-alias=none}.
+
 @item -fexceptions
 @opindex fexceptions
 Enable exception handling.  Generates extra code needed to propagate
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 6301cea..87b1677 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -293,5 +293,12 @@ enum gfc_convert
   GFC_FLAG_CONVERT_LITTLE
 };
 
+enum offload_alias
+{
+  OFFLOAD_ALIAS_ALL,
+  OFFLOAD_ALIAS_POINTER,
+  OFFLOAD_ALIAS_NONE
+};
+
 
 #endif /* ! GCC_FLAG_TYPES_H */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 3543785..6bac074 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1441,6 +1441,14 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
   tree field, type, sfield = NULL_TREE;
   splay_tree_key 

Re: [PATCH, 2/2] Handle recursive restrict in function parameter

2015-11-03 Thread Tom de Vries

On 03/11/15 16:08, Richard Biener wrote:

On Tue, 3 Nov 2015, Tom de Vries wrote:


On 01/11/15 19:20, Tom de Vries wrote:

On 01/11/15 19:03, Tom de Vries wrote:

So, the new patch series is:

   1Rename make_restrict_var_constraints to make_param_constraints
   2Handle recursive restrict in function parameter

I'll repost in reply to this message.



This patch adds handling of all the restrict qualifiers in the type of a
function parameter.



And reposting an updated version, now that the toplevel parameter in
make_param_constraints has been eliminated.


@@ -5195,6 +5197,8 @@ struct fieldoff
unsigned may_have_pointers : 1;

unsigned only_restrict_pointers : 1;
+
+  varinfo_t restrict_var;
  };

store the varinfo ID here, 'unsigned int restrict_var' which ends
up not changing fieldoff size.  get_varinfo (restrict_var) will get
you the varinfo_t.


Done, attached.



@@ -5374,6 +5380,19 @@ push_fields_onto_fieldstack (tree type,
vec *fieldstack,
   = (!has_unknown_size
  && POINTER_TYPE_P (field_type)
  && TYPE_RESTRICT (field_type));
+   if (handle_param
+   && e.only_restrict_pointers
+   && !type_contains_placeholder_p (TREE_TYPE
(field_type)))
+ {
+   varinfo_t rvi;
+   tree heapvar = build_fake_var_decl (TREE_TYPE
(field_type));
+   DECL_EXTERNAL (heapvar) = 1;
+   rvi = create_variable_info_for_1 (heapvar,
"PARM_NOALIAS",
+ true, true);
+   rvi->is_restrict_var = 1;
+   insert_vi_for_tree (heapvar, rvi);
+   e.restrict_var = rvi;
+ }

hmm, can you delay this to the point we actually will use field-sensitive
stuff?  That is, until create_variable_info_for_1 decided to use a
multi-field variable?


AFAIU your concern is that in the current patch we're creating heapvars 
that may end up being ignored, f.i. if we hit the 
MAX_FIELDS_FOR_FIELD_SENSITIVE threshold?



 Say, here:

+  if (handle_param
+ && newvi->only_restrict_pointers
+ && fo->restrict_var != NULL)
+   {
+ make_constraint_from (newvi, fo->restrict_var->id);
+ make_param_constraints (fo->restrict_var);
+   }

?  Looks like then you don't need the new field at all.



The build_fake_var_decl call needs TREE_TYPE (field_type), the type the 
restrict pointer field points to.


The field type is no longer available once we've abstracted the struct 
type into a field stack in create_variable_info_for_1.


I think I can postpone the creation of the heapvar till where you 
suggest in create_variable_info_for_1, but I'd still need a means
to communicate the TREE_TYPE (field_type) from 
push_fields_onto_fieldstack to create_variable_info_for_1.


A simple implementation would be a new field:
...
@@ -5195,6 +5197,8 @@ struct fieldoff
unsigned may_have_pointers : 1;

unsigned only_restrict_pointers : 1;
+
+ tree restrict_pointed_type;
};
...
Which AFAIU will change fieldoff size.

Thanks,
- Tom
Handle recursive restrict in function parameter

	* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
	(push_fields_onto_fieldstack): Add and handle handle_param parameter.
	(create_variable_info_for_1): Add and handle
	handle_param parameter.  Add extra arg to call to
	push_fields_onto_fieldstack.  Handle restrict pointer fields.
	(create_variable_info_for): Call create_variable_info_for_1 with extra
	arg.
	(make_param_constraints): Drop restrict_name parameter.  Ignore
	vi->only_restrict_pointers.
	(intra_create_variable_infos): Call create_variable_info_for_1 with
	extra arg.  Remove restrict handling.  Call make_param_constraints with
	one less arg.

	* gcc.dg/tree-ssa/restrict-7.c: New test.
	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 ++
 gcc/tree-ssa-structalias.c | 91 ++
 3 files changed, 84 insertions(+), 36 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
new file mode 100644
index 000..f7a68c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int
+f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
+{
+  *b = 1;
+  ***a  = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new f

[gomp4, committed] Backported param parloops-schedule

2015-10-14 Thread Tom de Vries
[ was: Re: [PATCH, 3/5] Handle original loop tree in 
expand_omp_for_generic ]


On 13/10/15 23:48, Thomas Schwinge wrote:

Hi Tom!

On Mon, 12 Oct 2015 18:56:29 +0200, Tom de Vries<tom_devr...@mentor.com>  wrote:

>Handle original loop tree in expand_omp_for_generic
>
>2015-09-12  Tom de Vries<t...@codesourcery.com>
>
>PR tree-optimization/67476
>* omp-low.c (expand_omp_for_generic): Handle original loop tree.

Working on a merge from trunk into gomp-4_0-branch, I'm seeing your
change (trunk r228754) conflict with code Chung-Lin changed
(gomp-4_0-branch r224505).  So, would you two please cherry-pick/merge
trunk r228754 into gomp-4_0-branch?  Thanks!  (I'm assuming you can
easily tell what needs to be done here; it's been a long time that
Chung-Lin touched this code, so CCing him just in case.)  Thanks!


Hi Thomas,

I've backport the whole patch series:
 1Handle simple latch in expand_omp_for_generic
 2Add missing phis in expand_omp_for_generic
 3Handle original loop tree in expand_omp_for_generic
 4Support DEFPARAMENUM in params.def
 5Add param parloops-schedule
and committed to gomp-4_0-branch.

I'm only posting patch nr. 3, the only one with a non-trivial conflict.

Thanks,
- Tom
Handle original loop tree in expand_omp_for_generic

2015-10-14  Tom de Vries  <t...@codesourcery.com>

	backport from trunk:
	2015-10-13  Tom de Vries  <t...@codesourcery.com>

	PR tree-optimization/67476
	* omp-low.c (expand_omp_for_generic): Handle original loop tree.
---
 gcc/omp-low.c | 38 +-
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 473e2e7..dde3e1b 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6924,7 +6924,6 @@ expand_omp_for_generic (struct omp_region *region,
   remove_edge (e);
 
   make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
-  add_bb_to_loop (l2_bb, cont_bb->loop_father);
   e = find_edge (cont_bb, l1_bb);
   if (e == NULL)
 	{
@@ -7002,23 +7001,36 @@ expand_omp_for_generic (struct omp_region *region,
   set_immediate_dominator (CDI_DOMINATORS, l1_bb,
 			   recompute_dominator (CDI_DOMINATORS, l1_bb));
 
-  struct loop *outer_loop;
-  if (seq_loop)
-	outer_loop = l0_bb->loop_father;
-  else
+  /* We enter expand_omp_for_generic with a loop.  This original loop may
+	 have its own loop struct, or it may be part of an outer loop struct
+	 (which may be the fake loop).  */
+  struct loop *outer_loop = entry_bb->loop_father;
+  bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
+
+  add_bb_to_loop (l2_bb, outer_loop);
+
+  struct loop *new_loop = NULL;
+  if (!seq_loop)
 	{
-	  outer_loop = alloc_loop ();
-	  outer_loop->header = l0_bb;
-	  outer_loop->latch = l2_bb;
-	  add_loop (outer_loop, l0_bb->loop_father);
+	  /* We've added a new loop around the original loop.  Allocate the
+	 corresponding loop struct.  */
+	  new_loop = alloc_loop ();
+	  new_loop->header = l0_bb;
+	  new_loop->latch = l2_bb;
+	  add_loop (new_loop, outer_loop);
 	}
 
-  if (!gimple_omp_for_combined_p (fd->for_stmt))
+  /* Allocate a loop structure for the original loop unless we already
+	 had one.  */
+  if (!orig_loop_has_loop_struct
+	  && !gimple_omp_for_combined_p (fd->for_stmt))
 	{
-	  struct loop *loop = alloc_loop ();
-	  loop->header = l1_bb;
+	  struct loop *orig_loop = alloc_loop ();
+	  orig_loop->header = l1_bb;
 	  /* The loop may have multiple latches.  */
-	  add_loop (loop, outer_loop);
+	  add_loop (orig_loop, (new_loop != NULL
+? new_loop
+: outer_loop));
 	}
 }
 }
-- 
1.9.1



[RFC] Add OPTGROUP_PAR

2015-10-19 Thread Tom de Vries

Hi,

this patch adds OPTGROUP_PAR.

It allows a user to see on stderr what loops are parallelized by 
pass_parallelize_loops, using -fopt-info-par:

...
$ gcc -O2 -fopt-info-par test.c -ftree-parallelize-loops=32
test.c:5:3: note: parallelized inner loop
...

This patch doesn't include any MSG_MISSED_OPTIMIZATION/MSG_NOTE messages 
yet.


Idea of the patch OK?

Any other comments?

Thanks,
- Tom
Add OPTGROUP_PAR

2015-10-19  Tom de Vries  <t...@codesourcery.com>

	* doc/invoke.texi (@item -fopt-info): Add @item par in group of
	optimizations table.
	* dumpfile.c (optgroup_options): Add OPTGROUP_PAR entry.
	* dumpfile.h (OPTGROUP_PAR): New define.
	(OPTGROUP_OTHER): Renumber.
	(OPTGROUP_ALL): Add OPTGROUP_PAR.
	* tree-parloops.c (parallelize_loops): Handle -fopt-info-par.
	(pass_data_parallelize_loops): Change optinfo_flags from OPTGROUP_LOOP
	to OPTGROUP_PAR.
---
 gcc/doc/invoke.texi |  2 ++
 gcc/dumpfile.c  |  1 +
 gcc/dumpfile.h  |  5 +++--
 gcc/tree-parloops.c | 16 ++--
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 54e9f12..629ee37 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7319,6 +7319,8 @@ Enable dumps from all loop optimizations.
 Enable dumps from all inlining optimizations.
 @item vec
 Enable dumps from all vectorization optimizations.
+@item par
+Enable dumps from all auto-parallelization optimizations.
 @item optall
 Enable dumps from all optimizations. This is a superset of
 the optimization groups listed above.
diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index e4c4748..421d19b 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -138,6 +138,7 @@ static const struct dump_option_value_info optgroup_options[] =
   {"loop", OPTGROUP_LOOP},
   {"inline", OPTGROUP_INLINE},
   {"vec", OPTGROUP_VEC},
+  {"par", OPTGROUP_PAR},
   {"optall", OPTGROUP_ALL},
   {NULL, 0}
 };
diff --git a/gcc/dumpfile.h b/gcc/dumpfile.h
index 5f30077..52371f4 100644
--- a/gcc/dumpfile.h
+++ b/gcc/dumpfile.h
@@ -97,9 +97,10 @@ enum tree_dump_index
 #define OPTGROUP_LOOP(1 << 2)   /* Loop optimization passes */
 #define OPTGROUP_INLINE  (1 << 3)   /* Inlining passes */
 #define OPTGROUP_VEC (1 << 4)   /* Vectorization passes */
-#define OPTGROUP_OTHER   (1 << 5)   /* All other passes */
+#define OPTGROUP_PAR	 (1 << 5)   /* Auto-parallelization passes */
+#define OPTGROUP_OTHER   (1 << 6)   /* All other passes */
 #define OPTGROUP_ALL	 (OPTGROUP_IPA | OPTGROUP_LOOP | OPTGROUP_INLINE \
-  | OPTGROUP_VEC | OPTGROUP_OTHER)
+			  | OPTGROUP_VEC | OPTGROUP_PAR | OPTGROUP_OTHER)
 
 /* Define a tree dump switch.  */
 struct dump_file_info
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index c7aa62c..e98c2c7 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2718,17 +2718,21 @@ parallelize_loops (void)
 
   changed = true;
   skip_loop = loop->inner;
+  const char *loop_describe = (loop->inner
+   ? "outer"
+   : "inner");
+  loop_loc = find_loop_location (loop);
   if (dump_file && (dump_flags & TDF_DETAILS))
   {
-	if (loop->inner)
-	  fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
-	else
-	  fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
-	loop_loc = find_loop_location (loop);
+	fprintf (dump_file, "parallelizing %s loop %d\n", loop_describe,
+		 loop->header->index);
 	if (loop_loc != UNKNOWN_LOCATION)
 	  fprintf (dump_file, "\nloop at %s:%d: ",
 		   LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
   }
+  if (dump_enabled_p ())
+	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
+			 "parallelized %s loop\n", loop_describe);
   gen_parallel_loop (loop, _list,
 			 n_threads, _desc);
 }
@@ -2752,7 +2756,7 @@ const pass_data pass_data_parallelize_loops =
 {
   GIMPLE_PASS, /* type */
   "parloops", /* name */
-  OPTGROUP_LOOP, /* optinfo_flags */
+  OPTGROUP_PAR, /* optinfo_flags */
   TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
   ( PROP_cfg | PROP_ssa ), /* properties_required */
   0, /* properties_provided */
-- 
1.9.1



<    4   5   6   7   8   9   10   11   12   13   >