On 05/11/15 11:16, Tom de Vries wrote:
Hi,
now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.
So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.
The following patch series implements that:
1 Move expansion of kernels region back to first omp-expand
2 Update gate_oacc_kernels to handle oacc function
3 Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
4 Revert "Add pass_dominator::sese_mode_p ()"
5 Handle oacc function in parloops
6 Update goacc kernels C testcases
7 Update goacc kernels Fortran testcases
8 Release_defs in expand_omp_atomic_fetch_op
9 Remove BUILT_IN_GOACC_KERNELS_INTERNAL
[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]
Committed to gomp-4_0-branch.
I'll post the patches in reply to this message.
This patch removes handling of kernels regions in tree-parloops.c, and
adds handling of oacc functions that used to be kernels regions before
they were split off.
That means we no longer add a parallel pragma. OTOH, we now have to
clear PROP_gimple_eomp in order to trigger the subsequent omp-expand pass.
Thanks,
- Tom
Handle oacc function in parloops
2015-11-04 Tom de Vries <t...@codesourcery.com>
* omp-low.c (set_oacc_fn_attrib): Remove static.
* omp-low.h (set_oacc_fn_attrib): Declare.
* tree-parloops.c (create_parallel_loop): Remove region_entry parameter.
Remove handling of oacc kernels pragma and GOACC_kernels_internal call.
Remove insertion of oacc parallel pragma. Set oacc function attributes.
(gen_parallel_loop): Remove region_entry parameter.
(get_omp_data_i_param): New function.
(try_create_reduction_list): Use get_omp_data_i_param instead of
gimple_stmt_omp_data_i_init_p.
(ref_conflicts_with_region): Handle GIMPLE_RETURN.
(oacc_entry_exit_ok_1): Same. Add missing is_gimple_call test before
gimple_call_internal_p test.
(oacc_entry_exit_ok): Remove region_entry parameter. Use
get_omp_data_i_param instead of get_omp_data_i. Set region_bbs to all
bbs in function. Use function entry as region entry.
(parallelize_loops): Allow oacc functions and parallelized function if
oacc_kernels_p. Remove region_entry variable.
(pass_parallelize_loops_oacc_kernels::execute): Clear PROP_gimple_eomp
if a loop was parallelized.
---
gcc/omp-low.c | 2 +-
gcc/omp-low.h | 1 +
gcc/tree-parloops.c | 119 ++++++++++++++++++++++------------------------------
3 files changed, 51 insertions(+), 71 deletions(-)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index ac8c8d0..58cb959 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12456,7 +12456,7 @@ replace_oacc_fn_attrib (tree fn, tree dims)
function attribute. Push any that are non-constant onto the ARGS
list, along with an appropriate GOMP_LAUNCH_DIM tag. */
-static void
+void
set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
{
/* Must match GOMP_DIM ordering. */
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index 7c9efdc..673b470 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -40,6 +40,7 @@ extern vec<basic_block> get_bbs_in_oacc_kernels_region (basic_block,
extern void replace_oacc_fn_attrib (tree, tree);
extern tree build_oacc_routine_dims (tree);
extern tree get_oacc_fn_attrib (tree);
+extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f14cf8a..c038dfe 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2017,7 +2017,7 @@ transform_to_exit_first_loop (struct loop *loop,
static void
create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
tree new_data, unsigned n_threads, location_t loc,
- basic_block region_entry, bool oacc_kernels_p)
+ bool oacc_kernels_p)
{
gimple_stmt_iterator gsi;
basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
@@ -2039,10 +2039,6 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
paral_bb = single_pred (bb);
gsi = gsi_last_bb (paral_bb);
}
- else
- /* Make sure the oacc parallel is inserted on top of the oacc kernels
- region. */
- gsi = gsi_last_bb (region_entry);
if (!oacc_kernels_p)
{
@@ -2056,50 +2052,10 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
}
else
{
- /* Create oacc parallel pragma based on oacc kernels pragma and
- GOACC_kernels_internal call. */
- gomp_target *kernels = as_a <gomp_target *> (gsi_stmt (gsi));
-
- tree clauses = gimple_omp_target_clauses (kernels);
- /* FIXME: We need a more intelligent mapping onto vector, gangs,
- workers. */
- if (1)
- {
- tree clause = build_omp_clause (gimple_location (kernels),
- OMP_CLAUSE_NUM_GANGS);
- OMP_CLAUSE_NUM_GANGS_EXPR (clause)
- = build_int_cst (integer_type_node, n_threads);
- OMP_CLAUSE_CHAIN (clause) = clauses;
- clauses = clause;
- }
- gomp_target *stmt
- = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_PARALLEL,
- clauses);
- tree child_fn = gimple_omp_target_child_fn (kernels);
- gimple_omp_target_set_child_fn (stmt, child_fn);
- tree data_arg = gimple_omp_target_data_arg (kernels);
- gimple_omp_target_set_data_arg (stmt, data_arg);
-
- gimple_set_location (stmt, loc);
-
- /* Insert oacc parallel pragma after the oacc kernels pragma. */
- {
- gimple_stmt_iterator gsi2;
- gsi = gsi_last_bb (region_entry);
- gsi2 = gsi;
- gsi_prev (&gsi2);
-
- /* Insert pragma acc parallel. */
- gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
-
- /* Remove GOACC_kernels_internal call. */
- replace_uses_by (gimple_vdef (gsi_stmt (gsi2)),
- gimple_vuse (gsi_stmt (gsi2)));
- gsi_remove (&gsi2, true);
-
- /* Remove pragma acc kernels. */
- gsi_remove (&gsi2, true);
- }
+ tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
+ OMP_CLAUSE_NUM_GANGS_EXPR (clause)
+ = build_int_cst (integer_type_node, n_threads);
+ set_oacc_fn_attrib (cfun->decl, clause, NULL);
}
/* Initialize NEW_DATA. */
@@ -2274,7 +2230,7 @@ static void
gen_parallel_loop (struct loop *loop,
reduction_info_table_type *reduction_list,
unsigned n_threads, struct tree_niter_desc *niter,
- basic_block region_entry, bool oacc_kernels_p)
+ bool oacc_kernels_p)
{
tree many_iterations_cond, type, nit;
tree arg_struct, new_arg_struct;
@@ -2457,7 +2413,7 @@ gen_parallel_loop (struct loop *loop,
if (cond_stmt)
loc = gimple_location (cond_stmt);
create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
- n_threads, loc, region_entry, oacc_kernels_p);
+ n_threads, loc, oacc_kernels_p);
if (reduction_list->elements () > 0)
create_call_for_reduction (loop, reduction_list, &clsn_data);
@@ -2650,6 +2606,22 @@ try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
return true;
}
+static tree
+get_omp_data_i_param (void)
+{
+ tree decl = DECL_ARGUMENTS (cfun->decl);
+ gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
+ for (unsigned int i = 0; i < num_ssa_names; ++i)
+ {
+ tree name = ssa_name (i);
+ if (name != NULL_TREE
+ && SSA_NAME_VAR (name) == decl)
+ return name;
+ }
+
+ gcc_unreachable ();
+}
+
/* Try to initialize REDUCTION_LIST for code generation part.
REDUCTION_LIST describes the reductions. */
@@ -2795,7 +2767,7 @@ try_create_reduction_list (loop_p loop,
return false;
addr2 = TREE_OPERAND (addr2, 0);
if (TREE_CODE (addr2) != SSA_NAME
- || !gimple_stmt_omp_data_i_init_p (SSA_NAME_DEF_STMT (addr2)))
+ || addr2 != get_omp_data_i_param ())
return false;
red->reduc_addr = addr;
}
@@ -2849,6 +2821,9 @@ ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
&& !gimple_vuse (stmt))
continue;
+ if (gimple_code (stmt) == GIMPLE_RETURN)
+ continue;
+
if (ref_is_store)
{
if (dead_load_p (stmt))
@@ -2989,9 +2964,12 @@ oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
&& !gimple_vdef (stmt)
&& !gimple_vuse (stmt))
continue;
- else if (gimple_call_internal_p (stmt)
+ else if (is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS)
continue;
+ else if (gimple_code (stmt) == GIMPLE_RETURN)
+ continue;
else
{
if (dump_file)
@@ -3119,19 +3097,17 @@ oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
}
static bool
-oacc_entry_exit_ok (struct loop *loop, basic_block region_entry,
+oacc_entry_exit_ok (struct loop *loop,
reduction_info_table_type *reduction_list)
{
basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
- basic_block region_exit
- = get_oacc_kernels_region_exit (single_succ (region_entry));
- vec<basic_block> region_bbs
- = get_bbs_in_oacc_kernels_region (region_entry, region_exit);
- tree omp_data_i = get_omp_data_i (region_entry);
+ tree omp_data_i = get_omp_data_i_param ();
gcc_assert (omp_data_i != NULL_TREE);
+ vec<basic_block> region_bbs
+ = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
- gimple_stmt_iterator gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (omp_data_i));
- gsi_next_nondebug (&gsi);
+ gimple_stmt_iterator gsi
+ = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
gimple *stmt = gsi_stmt (gsi);
gcc_assert (gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
@@ -3182,15 +3158,16 @@ parallelize_loops (bool oacc_kernels_p)
struct obstack parloop_obstack;
HOST_WIDE_INT estimated;
source_location loop_loc;
- basic_block region_entry = NULL;
/* Do not parallelize loops in the functions created by parallelization. */
- if (parallelized_function_p (cfun->decl))
+ if (!oacc_kernels_p
+ && parallelized_function_p (cfun->decl))
return false;
/* Do not parallelize loops in offloaded functions. */
- if (get_oacc_fn_attrib (cfun->decl) != NULL)
- return false;
+ if (!oacc_kernels_p
+ && get_oacc_fn_attrib (cfun->decl) != NULL)
+ return false;
if (cfun->has_nonlocal_label)
return false;
@@ -3231,8 +3208,6 @@ parallelize_loops (bool oacc_kernels_p)
fprintf (dump_file,
"Trying loop %d with header bb %d in oacc kernels region\n",
loop->num, loop->header->index);
-
- region_entry = loop_get_oacc_kernels_region_entry (loop);
}
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -3309,7 +3284,7 @@ parallelize_loops (bool oacc_kernels_p)
}
if (oacc_kernels_p
- && !oacc_entry_exit_ok (loop, region_entry, &reduction_list))
+ && !oacc_entry_exit_ok (loop, &reduction_list))
{
if (dump_file)
fprintf (dump_file, "entry/exit not ok: FAILED\n");
@@ -3332,7 +3307,7 @@ parallelize_loops (bool oacc_kernels_p)
}
gen_parallel_loop (loop, &reduction_list,
- n_threads, &niter_desc, region_entry, oacc_kernels_p);
+ n_threads, &niter_desc, oacc_kernels_p);
}
obstack_free (&parloop_obstack, NULL);
@@ -3437,7 +3412,11 @@ pass_parallelize_loops_oacc_kernels::execute (function *fun)
return 0;
if (parallelize_loops (true))
- return TODO_update_ssa;
+ {
+ fun->curr_properties &= ~(PROP_gimple_eomp);
+
+ return TODO_update_ssa;
+ }
return 0;
}
--
1.9.1