On 09/11/15 16:35, Tom de Vries wrote:
Hi,
this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.
The patch series contains these patches:
1 Insert new exit block only when needed in
transform_to_exit_first_loop_alt
2 Make create_parallel_loop return void
3 Ignore reduction clause on kernels directive
4 Implement -foffload-alias
5 Add in_oacc_kernels_region in struct loop
6 Add pass_oacc_kernels
7 Add pass_dominator_oacc_kernels
8 Add pass_ch_oacc_kernels
9 Add pass_parallelize_loops_oacc_kernels
10 Add pass_oacc_kernels pass group in passes.def
11 Update testcases after adding kernels pass group
12 Handle acc loop directive
13 Add c-c++-common/goacc/kernels-*.c
14 Add gfortran.dg/goacc/kernels-*.f95
15 Add libgomp.oacc-c-c++-common/kernels-*.c
16 Add libgomp.oacc-fortran/kernels-*.f95
The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.
Bootstrapped and reg-tested on x86_64.
Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).
I'll post the individual patches in reply to this message.
This patch adds the pass_oacc_kernels pass group to the pass list in
passes.def.
Note the repetition of pass_lim/pass_copy_prop. The first pair is for an
inner loop in a loop nest, the second for an outer loop in a loop nest.
Thanks,
- Tom
Add pass_oacc_kernels pass group in passes.def
2015-11-09 Tom de Vries <t...@codesourcery.com>
* omp-low.c (pass_expand_omp_ssa::clone): New function.
* tree-ssa-loop.c (pass_scev_cprop::clone, pass_tree_loop_init::clone)
(pass_tree_loop_done::clone): New function.
* passes.def: Add pass_oacc_kernels pass group.
---
gcc/omp-low.c | 1 +
gcc/passes.def | 21 +++++++++++++++++++++
gcc/tree-ssa-loop.c | 3 +++
3 files changed, 25 insertions(+)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 13fa456..1283cc7 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13360,6 +13360,7 @@ public:
return !(fun->curr_properties & PROP_gimple_eomp);
}
virtual unsigned int execute (function *) { return execute_expand_omp (); }
+ opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
}; // class pass_expand_omp_ssa
diff --git a/gcc/passes.def b/gcc/passes.def
index c0ab6b9..b7a5424 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -86,6 +86,27 @@ along with GCC; see the file COPYING3. If not see
/* pass_build_ealias is a dummy pass that ensures that we
execute TODO_rebuild_alias at this point. */
NEXT_PASS (pass_build_ealias);
+ /* Pass group that runs when there are oacc kernels in the
+ function. */
+ NEXT_PASS (pass_oacc_kernels);
+ PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+ NEXT_PASS (pass_dominator_oacc_kernels);
+ NEXT_PASS (pass_ch_oacc_kernels);
+ NEXT_PASS (pass_dominator_oacc_kernels);
+ NEXT_PASS (pass_tree_loop_init);
+ NEXT_PASS (pass_lim);
+ NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_lim);
+ NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_scev_cprop);
+ NEXT_PASS (pass_tree_loop_done);
+ NEXT_PASS (pass_dominator_oacc_kernels);
+ NEXT_PASS (pass_dce);
+ NEXT_PASS (pass_tree_loop_init);
+ NEXT_PASS (pass_parallelize_loops_oacc_kernels);
+ NEXT_PASS (pass_expand_omp_ssa);
+ NEXT_PASS (pass_tree_loop_done);
+ POP_INSERT_PASSES ()
NEXT_PASS (pass_fre);
NEXT_PASS (pass_merge_phi);
NEXT_PASS (pass_dse);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index b51cac2..0557f99 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -270,6 +270,7 @@ public:
/* opt_pass methods: */
virtual unsigned int execute (function *);
+ opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }
}; // class pass_tree_loop_init
@@ -374,6 +375,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *) { return flag_tree_scev_cprop; }
virtual unsigned int execute (function *) { return scev_const_prop (); }
+ opt_pass * clone () { return new pass_scev_cprop (m_ctxt); }
}; // class pass_scev_cprop
@@ -516,6 +518,7 @@ public:
/* opt_pass methods: */
virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+ opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }
}; // class pass_tree_loop_done
--
1.9.1