On 09/11/15 16:35, Tom de Vries wrote:
Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

      1    Insert new exit block only when needed in
         transform_to_exit_first_loop_alt
      2    Make create_parallel_loop return void
      3    Ignore reduction clause on kernels directive
      4    Implement -foffload-alias
      5    Add in_oacc_kernels_region in struct loop
      6    Add pass_oacc_kernels
      7    Add pass_dominator_oacc_kernels
      8    Add pass_ch_oacc_kernels
      9    Add pass_parallelize_loops_oacc_kernels
     10    Add pass_oacc_kernels pass group in passes.def
     11    Update testcases after adding kernels pass group
     12    Handle acc loop directive
     13    Add c-c++-common/goacc/kernels-*.c
     14    Add gfortran.dg/goacc/kernels-*.f95
     15    Add libgomp.oacc-c-c++-common/kernels-*.c
     16    Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.

this patch deals with loops in an oacc kernels region which are annotated using "#pragma acc loop". It expands such a loop as a normal loop, which has the effect of ignoring the "#pragma acc loop".

Thanks,
- Tom

Handle acc loop directive

2015-11-09  Tom de Vries  <t...@codesourcery.com>

	* omp-low.c (struct omp_region): Add inside_kernels_p field.
	(expand_omp_for_generic): Only set address taken for istart0
	and end0 unless necessary.  Adjust to generate a 'sequential' loop
	when GOMP builtin arguments are BUILT_IN_NONE.
	(expand_omp_for): Use expand_omp_for_generic() to generate a
	non-parallelized loop for OMP_FORs inside OpenACC kernels regions.
	(expand_omp): Mark inside_kernels_p field true for regions
	nested inside OpenACC kernels constructs.
---
 gcc/omp-low.c | 127 ++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 1283cc7..859a2eb 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -136,6 +136,9 @@ struct omp_region
   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
      a depend clause.  */
   gomp_ordered *ord_stmt;
+
+  /* True if this is nested inside an OpenACC kernels construct.  */
+  bool inside_kernels_p;
 };
 
 /* Context structure.  Used to store information about each parallel
@@ -8238,6 +8241,7 @@ expand_omp_for_generic (struct omp_region *region,
   gassign *assign_stmt;
   bool in_combined_parallel = is_combined_parallel (region);
   bool broken_loop = region->cont == NULL;
+  bool seq_loop = (start_fn == BUILT_IN_NONE || next_fn == BUILT_IN_NONE);
   edge e, ne;
   tree *counts = NULL;
   int i;
@@ -8335,8 +8339,12 @@ expand_omp_for_generic (struct omp_region *region,
   type = TREE_TYPE (fd->loop.v);
   istart0 = create_tmp_var (fd->iter_type, ".istart0");
   iend0 = create_tmp_var (fd->iter_type, ".iend0");
-  TREE_ADDRESSABLE (istart0) = 1;
-  TREE_ADDRESSABLE (iend0) = 1;
+
+    if (!seq_loop)
+    {
+      TREE_ADDRESSABLE (istart0) = 1;
+      TREE_ADDRESSABLE (iend0) = 1;
+    }
 
   /* See if we need to bias by LLONG_MIN.  */
   if (fd->iter_type == long_long_unsigned_type_node
@@ -8366,7 +8374,20 @@ expand_omp_for_generic (struct omp_region *region,
   gsi_prev (&gsif);
 
   tree arr = NULL_TREE;
-  if (in_combined_parallel)
+  if (seq_loop)
+    {
+      tree n1 = fold_convert (fd->iter_type, fd->loop.n1);
+      tree n2 = fold_convert (fd->iter_type, fd->loop.n2);
+
+      assign_stmt = gimple_build_assign (istart0, n1);
+      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+
+      assign_stmt = gimple_build_assign (iend0, n2);
+      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+
+      t = fold_build2 (NE_EXPR, boolean_type_node, istart0, iend0);
+    }
+  else if (in_combined_parallel)
     {
       gcc_assert (fd->ordered == 0);
       /* In a combined parallel loop, emit a call to
@@ -8788,39 +8809,45 @@ expand_omp_for_generic (struct omp_region *region,
 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
 
       /* Emit code to get the next parallel iteration in L2_BB.  */
-      gsi = gsi_start_bb (l2_bb);
+      if (!seq_loop)
+	{
+	  gsi = gsi_start_bb (l2_bb);
 
-      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
-			   build_fold_addr_expr (istart0),
-			   build_fold_addr_expr (iend0));
-      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-				    false, GSI_CONTINUE_LINKING);
-      if (TREE_TYPE (t) != boolean_type_node)
-	t = fold_build2 (NE_EXPR, boolean_type_node,
-			 t, build_int_cst (TREE_TYPE (t), 0));
-      gcond *cond_stmt = gimple_build_cond_empty (t);
-      gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	  t = build_call_expr (builtin_decl_explicit (next_fn), 2,
+			       build_fold_addr_expr (istart0),
+			       build_fold_addr_expr (iend0));
+	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+					false, GSI_CONTINUE_LINKING);
+	  if (TREE_TYPE (t) != boolean_type_node)
+	    t = fold_build2 (NE_EXPR, boolean_type_node,
+			     t, build_int_cst (TREE_TYPE (t), 0));
+	  gcond *cond_stmt = gimple_build_cond_empty (t);
+	  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	}
     }
 
   /* Add the loop cleanup function.  */
   gsi = gsi_last_bb (exit_bb);
-  if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
-    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
-  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
-    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
-  else
-    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
-  gcall *call_stmt = gimple_build_call (t, 0);
-  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
-    gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
-  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
-  if (fd->ordered)
+  if (!seq_loop)
     {
-      tree arr = counts[fd->ordered];
-      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
-      TREE_THIS_VOLATILE (clobber) = 1;
-      gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
-			GSI_SAME_STMT);
+      if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
+	t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
+      else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+	t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
+      else
+	t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
+      gcall *call_stmt = gimple_build_call (t, 0);
+      if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+	gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
+      gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
+      if (fd->ordered)
+	{
+	  tree arr = counts[fd->ordered];
+	  tree clobber = build_constructor (TREE_TYPE (arr), NULL);
+	  TREE_THIS_VOLATILE (clobber) = 1;
+	  gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
+			    GSI_SAME_STMT);
+	}
     }
   gsi_remove (&gsi, true);
 
@@ -8833,7 +8860,9 @@ expand_omp_for_generic (struct omp_region *region,
       gimple_seq phis;
 
       e = find_edge (cont_bb, l3_bb);
-      ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
+      ne = make_edge (l2_bb, l3_bb, (seq_loop
+				     ? EDGE_FALLTHRU
+				     : EDGE_FALSE_VALUE));
 
       phis = phi_nodes (l3_bb);
       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -8873,7 +8902,8 @@ expand_omp_for_generic (struct omp_region *region,
 	  e = find_edge (cont_bb, l2_bb);
 	  e->flags = EDGE_FALLTHRU;
 	}
-      make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
+      if (!seq_loop)
+	make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
 
       if (gimple_in_ssa_p (cfun))
 	{
@@ -8929,12 +8959,16 @@ expand_omp_for_generic (struct omp_region *region,
 
       add_bb_to_loop (l2_bb, outer_loop);
 
-      /* We've added a new loop around the original loop.  Allocate the
-	 corresponding loop struct.  */
-      struct loop *new_loop = alloc_loop ();
-      new_loop->header = l0_bb;
-      new_loop->latch = l2_bb;
-      add_loop (new_loop, outer_loop);
+      struct loop *new_loop = NULL;
+      if (!seq_loop)
+	{
+	  /* We've added a new loop around the original loop.  Allocate the
+	     corresponding loop struct.  */
+	  new_loop = alloc_loop ();
+	  new_loop->header = l0_bb;
+	  new_loop->latch = l2_bb;
+	  add_loop (new_loop, outer_loop);
+	}
 
       /* Allocate a loop structure for the original loop unless we already
 	 had one.  */
@@ -8944,7 +8978,9 @@ expand_omp_for_generic (struct omp_region *region,
 	  struct loop *orig_loop = alloc_loop ();
 	  orig_loop->header = l1_bb;
 	  /* The loop may have multiple latches.  */
-	  add_loop (orig_loop, new_loop);
+	  add_loop (orig_loop, (new_loop != NULL
+				? new_loop
+				: outer_loop));
 	}
     }
 }
@@ -11348,7 +11384,10 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
        original loops from being detected.  Fix that up.  */
     loops_state_set (LOOPS_NEED_FIXUP);
 
-  if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
+  if (region->inside_kernels_p)
+    expand_omp_for_generic (region, &fd, BUILT_IN_NONE, BUILT_IN_NONE,
+			    inner_stmt);
+  else if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
     expand_omp_simd (region, &fd);
   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
     expand_cilk_for (region, &fd);
@@ -13030,6 +13069,14 @@ expand_omp (struct omp_region *region)
       if (region->type == GIMPLE_OMP_PARALLEL)
 	determine_parallel_type (region);
 
+      if (region->type == GIMPLE_OMP_TARGET && region->inner)
+	{
+	  gomp_target *entry = as_a <gomp_target *> (last_stmt (region->entry));
+	  if (gimple_omp_target_kind (entry) == GF_OMP_TARGET_KIND_OACC_KERNELS
+	      || region->inside_kernels_p)
+	    region->inner->inside_kernels_p = true;
+	}
+
       if (region->type == GIMPLE_OMP_FOR
 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
 	inner_stmt = last_stmt (region->inner->entry);
-- 
1.9.1

Reply via email to