Hi! On Tue, 25 Nov 2014 12:27:34 +0100, Tom de Vries <tom_devr...@mentor.com> wrote: > On 15-11-14 18:21, Tom de Vries wrote: > > On 15-11-14 13:14, Tom de Vries wrote: > >> Hi, > >> > >> I'm submitting a patch series with initial support for the oacc kernels > >> directive. > >> > >> The patch series uses pass_parallelize_loops to implement parallelization > >> of > >> loops in the oacc kernels region. > >> > >> The patch series consists of these 8 patches: > >> ... > >> 1 Expand oacc kernels after pass_build_ealias > >> 2 Add pass_oacc_kernels > >> 3 Add pass_ch_oacc_kernels to pass_oacc_kernels > >> 4 Add pass_tree_loop_{init,done} to pass_oacc_kernels > >> 5 Add pass_loop_im to pass_oacc_kernels > >> 6 Add pass_ccp to pass_oacc_kernels > >> 7 Add pass_parloops_oacc_kernels to pass_oacc_kernels > >> 8 Do simple omp lowering for no address taken var > >> ... > > > > This patch adds a pass_ch_oacc_kernels to the pass group pass_oacc_kernels. > > > > The idea is that pass_parallelize_loops only deals with loops for which the > > header has been copied, so the easiest way to meet that requirement when > > running > > pass_parallelize_loops in group pass_oacc_kernels, is to run pass_ch as a > > part > > of pass_oacc_kernels. > > > > We define a seperate pass pass_ch_oacc_kernels, to leave all loops that > > aren't > > part of a kernels region alone. > > > > Updated for moving pass_oacc_kernels down past pass_fre in the pass list. > > Bootstrapped and reg-tested as before. > > OK for trunk?
Committed to gomp-4_0-branch in r222281: commit 58c33a7965c379b55b549d50e3b79b2252bcc876 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Apr 21 19:48:16 2015 +0000 Add pass_ch_oacc_kernels to pass_oacc_kernels gcc/ * omp-low.c (loop_in_oacc_kernels_region_p): New function. * omp-low.h (loop_in_oacc_kernels_region_p): Declare. * passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels. * tree-pass.h (make_pass_ch_oacc_kernels): Declare * tree-ssa-loop-ch.c: Include omp-low.h. (pass_ch_execute): Declare. (pass_ch::execute): Factor out ... (pass_ch_execute): ... this new function. If handling oacc kernels, skip loops that are not in oacc kernels region. (pass_ch_oacc_kernels::execute): (pass_data_ch_oacc_kernels): New pass_data. (class pass_ch_oacc_kernels): New pass. (pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New function. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222281 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 15 ++++++++ gcc/omp-low.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/omp-low.h | 2 ++ gcc/passes.def | 1 + gcc/tree-pass.h | 1 + gcc/tree-ssa-loop-ch.c | 59 +++++++++++++++++++++++++++++-- 6 files changed, 167 insertions(+), 2 deletions(-) diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index 8a53ad8..d00c5e0 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,5 +1,20 @@ 2015-04-21 Tom de Vries <t...@codesourcery.com> + * omp-low.c (loop_in_oacc_kernels_region_p): New function. + * omp-low.h (loop_in_oacc_kernels_region_p): Declare. + * passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels. + * tree-pass.h (make_pass_ch_oacc_kernels): Declare + * tree-ssa-loop-ch.c: Include omp-low.h. + (pass_ch_execute): Declare. + (pass_ch::execute): Factor out ... + (pass_ch_execute): ... this new function. If handling oacc kernels, + skip loops that are not in oacc kernels region. + (pass_ch_oacc_kernels::execute): + (pass_data_ch_oacc_kernels): New pass_data. + (class pass_ch_oacc_kernels): New pass. + (pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New + function. + * passes.def: Add pass group pass_oacc_kernels. * tree-pass.h (make_pass_oacc_kernels): Declare. * tree-ssa-loop.c (gate_oacc_kernels): New static function. diff --git gcc/omp-low.c gcc/omp-low.c index 16d9a5e..1b03ae6 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -13920,4 +13920,95 @@ gimple_stmt_omp_data_i_init_p (gimple stmt) SSA_OP_DEF); } +/* Return true if LOOP is inside a kernels region. */ + +bool +loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry, + basic_block *region_exit) +{ + bitmap excludes_bitmap = BITMAP_GGC_ALLOC (); + bitmap region_bitmap = BITMAP_GGC_ALLOC (); + bitmap_clear (region_bitmap); + + if (region_entry != NULL) + *region_entry = NULL; + if (region_exit != NULL) + *region_exit = NULL; + + basic_block bb; + gimple last; + FOR_EACH_BB_FN (bb, cfun) + { + if (bitmap_bit_p (region_bitmap, bb->index)) + continue; + + last = last_stmt (bb); + if (!last) + continue; + + if (gimple_code (last) != GIMPLE_OMP_TARGET + || (gimple_omp_target_kind (last) != GF_OMP_TARGET_KIND_OACC_KERNELS)) + continue; + + bitmap_clear (excludes_bitmap); + bitmap_set_bit (excludes_bitmap, bb->index); + + vec<basic_block> dominated + = get_all_dominated_blocks (CDI_DOMINATORS, bb); + + unsigned di; + basic_block dom; + + basic_block end_region = NULL; + FOR_EACH_VEC_ELT (dominated, di, dom) + { + if (dom == bb) + continue; + + last = last_stmt (dom); + if (!last) + continue; + + if (gimple_code (last) != GIMPLE_OMP_RETURN) + continue; + + if (end_region == NULL + || dominated_by_p (CDI_DOMINATORS, end_region, dom)) + end_region = dom; + } + + if (end_region == NULL) + { + gimple kernels = last_stmt (bb); + fatal_error (gimple_location (kernels), + "End of kernel region unreachable"); + } + + vec<basic_block> excludes + = get_all_dominated_blocks (CDI_DOMINATORS, end_region); + + unsigned di2; + basic_block exclude; + + FOR_EACH_VEC_ELT (excludes, di2, exclude) + if (exclude != end_region) + bitmap_set_bit (excludes_bitmap, exclude->index); + + FOR_EACH_VEC_ELT (dominated, di, dom) + if (!bitmap_bit_p (excludes_bitmap, dom->index)) + bitmap_set_bit (region_bitmap, dom->index); + + if (bitmap_bit_p (region_bitmap, loop->header->index)) + { + if (region_entry != NULL) + *region_entry = bb; + if (region_exit != NULL) + *region_exit = end_region; + return true; + } + } + + return false; +} + #include "gt-omp-low.h" diff --git gcc/omp-low.h gcc/omp-low.h index 3d30c3b..ae63c9f 100644 --- gcc/omp-low.h +++ gcc/omp-low.h @@ -29,6 +29,8 @@ extern tree omp_reduction_init (tree, tree); extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *); extern void omp_finish_file (void); extern bool gimple_stmt_omp_data_i_init_p (gimple); +extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block *, + basic_block *); extern GTY(()) vec<tree, va_gc> *offload_funcs; extern GTY(()) vec<tree, va_gc> *offload_vars; diff --git gcc/passes.def gcc/passes.def index 854c5b8..5cdbc87 100644 --- gcc/passes.def +++ gcc/passes.def @@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see function. */ NEXT_PASS (pass_oacc_kernels); PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels) + NEXT_PASS (pass_ch_oacc_kernels); NEXT_PASS (pass_expand_omp_ssa); POP_INSERT_PASSES () NEXT_PASS (pass_merge_phi); diff --git gcc/tree-pass.h gcc/tree-pass.h index 35778f2..321229a 100644 --- gcc/tree-pass.h +++ gcc/tree-pass.h @@ -379,6 +379,7 @@ extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_ch_oacc_kernels (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt); diff --git gcc/tree-ssa-loop-ch.c gcc/tree-ssa-loop-ch.c index d759de7..5f24bcb 100644 --- gcc/tree-ssa-loop-ch.c +++ gcc/tree-ssa-loop-ch.c @@ -54,12 +54,15 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "flags.h" #include "tree-ssa-threadedge.h" +#include "omp-low.h" /* Duplicates headers of loops if they are small enough, so that the statements in the loop body are always executed when the loop is entered. This increases effectiveness of code motion optimizations, and reduces the need for loop preconditioning. */ +static unsigned int pass_ch_execute (function *, bool); + /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT instructions should be duplicated, limit is decreased by the actual amount. */ @@ -178,6 +181,14 @@ public: unsigned int pass_ch::execute (function *fun) { + return pass_ch_execute (fun, false); +} + +} // anon namespace + +static unsigned int +pass_ch_execute (function *fun, bool oacc_kernels_p) +{ struct loop *loop; basic_block header; edge exit, entry; @@ -211,6 +222,10 @@ pass_ch::execute (function *fun) if (do_while_loop_p (loop)) continue; + if (oacc_kernels_p + && !loop_in_oacc_kernels_region_p (loop, NULL, NULL)) + continue; + /* Iterate the header copying up to limit; this takes care of the cases like while (a && b) {...}, where we want to have both of the conditions copied. TODO -- handle while (a || b) - like cases, by not requiring @@ -301,10 +316,50 @@ pass_ch::execute (function *fun) return 0; } -} // anon namespace - gimple_opt_pass * make_pass_ch (gcc::context *ctxt) { return new pass_ch (ctxt); } + +namespace { + +const pass_data pass_data_ch_oacc_kernels = +{ + GIMPLE_PASS, /* type */ + "ch_oacc_kernels", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_TREE_CH, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_cleanup_cfg, /* todo_flags_finish */ +}; + + class pass_ch_oacc_kernels : public gimple_opt_pass +{ +public: + pass_ch_oacc_kernels (gcc::context *ctxt) + : gimple_opt_pass (pass_data_ch_oacc_kernels, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return true; } + virtual unsigned int execute (function *); + +}; // class pass_ch_oacc_kernels + +unsigned int +pass_ch_oacc_kernels::execute (function *fun) +{ + return pass_ch_execute (fun, true); +} + +} // anon namespace + +gimple_opt_pass * +make_pass_ch_oacc_kernels (gcc::context *ctxt) +{ + return new pass_ch_oacc_kernels (ctxt); +} Grüße, Thomas
signature.asc
Description: PGP signature